Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/AMDGPU/llvm.amdgcn.class.ll @ 100:7d135dc70f03 LLVM 3.9
LLVM 3.9
author | Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 26 Jan 2016 22:53:40 +0900 |
parents | |
children | 1172e4bd9c6f |
comparison
equal
deleted
inserted
replaced
96:6418606d0ead | 100:7d135dc70f03 |
---|---|
1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s | |
2 | |
3 declare i1 @llvm.amdgcn.class.f32(float, i32) #1 | |
4 declare i1 @llvm.amdgcn.class.f64(double, i32) #1 | |
5 declare i32 @llvm.r600.read.tidig.x() #1 | |
6 declare float @llvm.fabs.f32(float) #1 | |
7 declare double @llvm.fabs.f64(double) #1 | |
8 | |
9 ; SI-LABEL: {{^}}test_class_f32: | |
10 ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb | |
11 ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc | |
12 ; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] | |
13 ; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[VB]] | |
14 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc | |
15 ; SI-NEXT: buffer_store_dword [[RESULT]] | |
16 ; SI: s_endpgm | |
17 define void @test_class_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { | |
18 %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 %b) #1 | |
19 %sext = sext i1 %result to i32 | |
20 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
21 ret void | |
22 } | |
23 | |
24 ; SI-LABEL: {{^}}test_class_fabs_f32: | |
25 ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb | |
26 ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc | |
27 ; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] | |
28 ; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SA]]|, [[VB]] | |
29 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] | |
30 ; SI-NEXT: buffer_store_dword [[RESULT]] | |
31 ; SI: s_endpgm | |
32 define void @test_class_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { | |
33 %a.fabs = call float @llvm.fabs.f32(float %a) #1 | |
34 %result = call i1 @llvm.amdgcn.class.f32(float %a.fabs, i32 %b) #1 | |
35 %sext = sext i1 %result to i32 | |
36 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
37 ret void | |
38 } | |
39 | |
40 ; SI-LABEL: {{^}}test_class_fneg_f32: | |
41 ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb | |
42 ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc | |
43 ; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] | |
44 ; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -[[SA]], [[VB]] | |
45 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] | |
46 ; SI-NEXT: buffer_store_dword [[RESULT]] | |
47 ; SI: s_endpgm | |
48 define void @test_class_fneg_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { | |
49 %a.fneg = fsub float -0.0, %a | |
50 %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg, i32 %b) #1 | |
51 %sext = sext i1 %result to i32 | |
52 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
53 ret void | |
54 } | |
55 | |
56 ; SI-LABEL: {{^}}test_class_fneg_fabs_f32: | |
57 ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb | |
58 ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc | |
59 ; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] | |
60 ; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|[[SA]]|, [[VB]] | |
61 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] | |
62 ; SI-NEXT: buffer_store_dword [[RESULT]] | |
63 ; SI: s_endpgm | |
64 define void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { | |
65 %a.fabs = call float @llvm.fabs.f32(float %a) #1 | |
66 %a.fneg.fabs = fsub float -0.0, %a.fabs | |
67 %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg.fabs, i32 %b) #1 | |
68 %sext = sext i1 %result to i32 | |
69 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
70 ret void | |
71 } | |
72 | |
73 ; SI-LABEL: {{^}}test_class_1_f32: | |
74 ; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb | |
75 ; SI: v_cmp_class_f32_e64 [[COND:s\[[0-9]+:[0-9]+\]]], [[SA]], 1{{$}} | |
76 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]] | |
77 ; SI-NEXT: buffer_store_dword [[RESULT]] | |
78 ; SI: s_endpgm | |
79 define void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 { | |
80 %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1 | |
81 %sext = sext i1 %result to i32 | |
82 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
83 ret void | |
84 } | |
85 | |
86 ; SI-LABEL: {{^}}test_class_64_f32: | |
87 ; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb | |
88 ; SI: v_cmp_class_f32_e64 [[COND:s\[[0-9]+:[0-9]+\]]], [[SA]], 64{{$}} | |
89 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]] | |
90 ; SI-NEXT: buffer_store_dword [[RESULT]] | |
91 ; SI: s_endpgm | |
92 define void @test_class_64_f32(i32 addrspace(1)* %out, float %a) #0 { | |
93 %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1 | |
94 %sext = sext i1 %result to i32 | |
95 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
96 ret void | |
97 } | |
98 | |
99 ; Set all 10 bits of mask | |
100 ; SI-LABEL: {{^}}test_class_full_mask_f32: | |
101 ; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb | |
102 ; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x3ff{{$}} | |
103 ; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[MASK]] | |
104 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc | |
105 ; SI-NEXT: buffer_store_dword [[RESULT]] | |
106 ; SI: s_endpgm | |
107 define void @test_class_full_mask_f32(i32 addrspace(1)* %out, float %a) #0 { | |
108 %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1023) #1 | |
109 %sext = sext i1 %result to i32 | |
110 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
111 ret void | |
112 } | |
113 | |
114 ; SI-LABEL: {{^}}test_class_9bit_mask_f32: | |
115 ; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb | |
116 ; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} | |
117 ; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[MASK]] | |
118 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc | |
119 ; SI-NEXT: buffer_store_dword [[RESULT]] | |
120 ; SI: s_endpgm | |
121 define void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 { | |
122 %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1 | |
123 %sext = sext i1 %result to i32 | |
124 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
125 ret void | |
126 } | |
127 | |
128 ; SI-LABEL: {{^}}v_test_class_full_mask_f32: | |
129 ; SI-DAG: buffer_load_dword [[VA:v[0-9]+]] | |
130 ; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} | |
131 ; SI: v_cmp_class_f32_e32 vcc, [[VA]], [[MASK]] | |
132 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc | |
133 ; SI: buffer_store_dword [[RESULT]] | |
134 ; SI: s_endpgm | |
135 define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { | |
136 %tid = call i32 @llvm.r600.read.tidig.x() #1 | |
137 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid | |
138 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | |
139 %a = load float, float addrspace(1)* %gep.in | |
140 | |
141 %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1 | |
142 %sext = sext i1 %result to i32 | |
143 store i32 %sext, i32 addrspace(1)* %gep.out, align 4 | |
144 ret void | |
145 } | |
146 | |
147 ; SI-LABEL: {{^}}test_class_inline_imm_constant_dynamic_mask_f32: | |
148 ; SI-DAG: buffer_load_dword [[VB:v[0-9]+]] | |
149 ; SI: v_cmp_class_f32_e32 vcc, 1.0, [[VB]] | |
150 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc | |
151 ; SI: buffer_store_dword [[RESULT]] | |
152 ; SI: s_endpgm | |
153 define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { | |
154 %tid = call i32 @llvm.r600.read.tidig.x() #1 | |
155 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid | |
156 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | |
157 %b = load i32, i32 addrspace(1)* %gep.in | |
158 | |
159 %result = call i1 @llvm.amdgcn.class.f32(float 1.0, i32 %b) #1 | |
160 %sext = sext i1 %result to i32 | |
161 store i32 %sext, i32 addrspace(1)* %gep.out, align 4 | |
162 ret void | |
163 } | |
164 | |
165 ; FIXME: Why isn't this using a literal constant operand? | |
166 ; SI-LABEL: {{^}}test_class_lit_constant_dynamic_mask_f32: | |
167 ; SI-DAG: buffer_load_dword [[VB:v[0-9]+]] | |
168 ; SI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000 | |
169 ; SI: v_cmp_class_f32_e32 vcc, [[VK]], [[VB]] | |
170 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc | |
171 ; SI: buffer_store_dword [[RESULT]] | |
172 ; SI: s_endpgm | |
173 define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { | |
174 %tid = call i32 @llvm.r600.read.tidig.x() #1 | |
175 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid | |
176 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | |
177 %b = load i32, i32 addrspace(1)* %gep.in | |
178 | |
179 %result = call i1 @llvm.amdgcn.class.f32(float 1024.0, i32 %b) #1 | |
180 %sext = sext i1 %result to i32 | |
181 store i32 %sext, i32 addrspace(1)* %gep.out, align 4 | |
182 ret void | |
183 } | |
184 | |
185 ; SI-LABEL: {{^}}test_class_f64: | |
186 ; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb | |
187 ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd | |
188 ; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] | |
189 ; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[VB]] | |
190 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc | |
191 ; SI-NEXT: buffer_store_dword [[RESULT]] | |
192 ; SI: s_endpgm | |
193 define void @test_class_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { | |
194 %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 %b) #1 | |
195 %sext = sext i1 %result to i32 | |
196 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
197 ret void | |
198 } | |
199 | |
200 ; SI-LABEL: {{^}}test_class_fabs_f64: | |
201 ; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb | |
202 ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd | |
203 ; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] | |
204 ; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SA]]|, [[VB]] | |
205 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] | |
206 ; SI-NEXT: buffer_store_dword [[RESULT]] | |
207 ; SI: s_endpgm | |
208 define void @test_class_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { | |
209 %a.fabs = call double @llvm.fabs.f64(double %a) #1 | |
210 %result = call i1 @llvm.amdgcn.class.f64(double %a.fabs, i32 %b) #1 | |
211 %sext = sext i1 %result to i32 | |
212 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
213 ret void | |
214 } | |
215 | |
216 ; SI-LABEL: {{^}}test_class_fneg_f64: | |
217 ; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb | |
218 ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd | |
219 ; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] | |
220 ; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -[[SA]], [[VB]] | |
221 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] | |
222 ; SI-NEXT: buffer_store_dword [[RESULT]] | |
223 ; SI: s_endpgm | |
224 define void @test_class_fneg_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { | |
225 %a.fneg = fsub double -0.0, %a | |
226 %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg, i32 %b) #1 | |
227 %sext = sext i1 %result to i32 | |
228 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
229 ret void | |
230 } | |
231 | |
232 ; SI-LABEL: {{^}}test_class_fneg_fabs_f64: | |
233 ; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb | |
234 ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd | |
235 ; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] | |
236 ; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|[[SA]]|, [[VB]] | |
237 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] | |
238 ; SI-NEXT: buffer_store_dword [[RESULT]] | |
239 ; SI: s_endpgm | |
240 define void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { | |
241 %a.fabs = call double @llvm.fabs.f64(double %a) #1 | |
242 %a.fneg.fabs = fsub double -0.0, %a.fabs | |
243 %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg.fabs, i32 %b) #1 | |
244 %sext = sext i1 %result to i32 | |
245 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
246 ret void | |
247 } | |
248 | |
249 ; SI-LABEL: {{^}}test_class_1_f64: | |
250 ; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 1{{$}} | |
251 ; SI: s_endpgm | |
252 define void @test_class_1_f64(i32 addrspace(1)* %out, double %a) #0 { | |
253 %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 1) #1 | |
254 %sext = sext i1 %result to i32 | |
255 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
256 ret void | |
257 } | |
258 | |
259 ; SI-LABEL: {{^}}test_class_64_f64: | |
260 ; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 64{{$}} | |
261 ; SI: s_endpgm | |
262 define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 { | |
263 %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 64) #1 | |
264 %sext = sext i1 %result to i32 | |
265 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
266 ret void | |
267 } | |
268 | |
269 ; Set all 9 bits of mask | |
270 ; SI-LABEL: {{^}}test_class_full_mask_f64: | |
271 ; SI: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb | |
272 ; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} | |
273 ; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[MASK]] | |
274 ; SI-NOT: vcc | |
275 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc | |
276 ; SI-NEXT: buffer_store_dword [[RESULT]] | |
277 ; SI: s_endpgm | |
278 define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 { | |
279 %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1 | |
280 %sext = sext i1 %result to i32 | |
281 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
282 ret void | |
283 } | |
284 | |
285 ; SI-LABEL: {{^}}v_test_class_full_mask_f64: | |
286 ; SI-DAG: buffer_load_dwordx2 [[VA:v\[[0-9]+:[0-9]+\]]] | |
287 ; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} | |
288 ; SI: v_cmp_class_f64_e32 vcc, [[VA]], [[MASK]] | |
289 ; SI-NOT: vcc | |
290 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc | |
291 ; SI: buffer_store_dword [[RESULT]] | |
292 ; SI: s_endpgm | |
293 define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 { | |
294 %tid = call i32 @llvm.r600.read.tidig.x() #1 | |
295 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid | |
296 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | |
297 %a = load double, double addrspace(1)* %in | |
298 | |
299 %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1 | |
300 %sext = sext i1 %result to i32 | |
301 store i32 %sext, i32 addrspace(1)* %gep.out, align 4 | |
302 ret void | |
303 } | |
304 | |
305 ; SI-LABEL: {{^}}test_class_inline_imm_constant_dynamic_mask_f64: | |
306 ; XSI: v_cmp_class_f64_e32 vcc, 1.0, | |
307 ; SI: v_cmp_class_f64_e32 vcc, | |
308 ; SI: s_endpgm | |
309 define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { | |
310 %tid = call i32 @llvm.r600.read.tidig.x() #1 | |
311 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid | |
312 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | |
313 %b = load i32, i32 addrspace(1)* %gep.in | |
314 | |
315 %result = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 %b) #1 | |
316 %sext = sext i1 %result to i32 | |
317 store i32 %sext, i32 addrspace(1)* %gep.out, align 4 | |
318 ret void | |
319 } | |
320 | |
321 ; SI-LABEL: {{^}}test_class_lit_constant_dynamic_mask_f64: | |
322 ; SI: v_cmp_class_f64_e32 vcc, s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} | |
323 ; SI: s_endpgm | |
324 define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { | |
325 %tid = call i32 @llvm.r600.read.tidig.x() #1 | |
326 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid | |
327 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | |
328 %b = load i32, i32 addrspace(1)* %gep.in | |
329 | |
330 %result = call i1 @llvm.amdgcn.class.f64(double 1024.0, i32 %b) #1 | |
331 %sext = sext i1 %result to i32 | |
332 store i32 %sext, i32 addrspace(1)* %gep.out, align 4 | |
333 ret void | |
334 } | |
335 | |
336 ; SI-LABEL: {{^}}test_fold_or_class_f32_0: | |
337 ; SI-NOT: v_cmp_class | |
338 ; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 3{{$}} | |
339 ; SI-NOT: v_cmp_class | |
340 ; SI: s_endpgm | |
341 define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { | |
342 %tid = call i32 @llvm.r600.read.tidig.x() #1 | |
343 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid | |
344 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | |
345 %a = load float, float addrspace(1)* %gep.in | |
346 | |
347 %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1 | |
348 %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 3) #1 | |
349 %or = or i1 %class0, %class1 | |
350 | |
351 %sext = sext i1 %or to i32 | |
352 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
353 ret void | |
354 } | |
355 | |
356 ; SI-LABEL: {{^}}test_fold_or3_class_f32_0: | |
357 ; SI-NOT: v_cmp_class | |
358 ; SI: v_cmp_class_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}} | |
359 ; SI-NOT: v_cmp_class | |
360 ; SI: s_endpgm | |
361 define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { | |
362 %tid = call i32 @llvm.r600.read.tidig.x() #1 | |
363 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid | |
364 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | |
365 %a = load float, float addrspace(1)* %gep.in | |
366 | |
367 %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1 | |
368 %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 2) #1 | |
369 %class2 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1 | |
370 %or.0 = or i1 %class0, %class1 | |
371 %or.1 = or i1 %or.0, %class2 | |
372 | |
373 %sext = sext i1 %or.1 to i32 | |
374 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
375 ret void | |
376 } | |
377 | |
378 ; SI-LABEL: {{^}}test_fold_or_all_tests_class_f32_0: | |
379 ; SI-NOT: v_cmp_class | |
380 ; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x3ff{{$}} | |
381 ; SI: v_cmp_class_f32_e32 vcc, v{{[0-9]+}}, [[MASK]]{{$}} | |
382 ; SI-NOT: v_cmp_class | |
383 ; SI: s_endpgm | |
384 define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { | |
385 %tid = call i32 @llvm.r600.read.tidig.x() #1 | |
386 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid | |
387 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | |
388 %a = load float, float addrspace(1)* %gep.in | |
389 | |
390 %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1 | |
391 %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 2) #1 | |
392 %class2 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1 | |
393 %class3 = call i1 @llvm.amdgcn.class.f32(float %a, i32 8) #1 | |
394 %class4 = call i1 @llvm.amdgcn.class.f32(float %a, i32 16) #1 | |
395 %class5 = call i1 @llvm.amdgcn.class.f32(float %a, i32 32) #1 | |
396 %class6 = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1 | |
397 %class7 = call i1 @llvm.amdgcn.class.f32(float %a, i32 128) #1 | |
398 %class8 = call i1 @llvm.amdgcn.class.f32(float %a, i32 256) #1 | |
399 %class9 = call i1 @llvm.amdgcn.class.f32(float %a, i32 512) #1 | |
400 %or.0 = or i1 %class0, %class1 | |
401 %or.1 = or i1 %or.0, %class2 | |
402 %or.2 = or i1 %or.1, %class3 | |
403 %or.3 = or i1 %or.2, %class4 | |
404 %or.4 = or i1 %or.3, %class5 | |
405 %or.5 = or i1 %or.4, %class6 | |
406 %or.6 = or i1 %or.5, %class7 | |
407 %or.7 = or i1 %or.6, %class8 | |
408 %or.8 = or i1 %or.7, %class9 | |
409 %sext = sext i1 %or.8 to i32 | |
410 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
411 ret void | |
412 } | |
413 | |
414 ; SI-LABEL: {{^}}test_fold_or_class_f32_1: | |
415 ; SI-NOT: v_cmp_class | |
416 ; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 12{{$}} | |
417 ; SI-NOT: v_cmp_class | |
418 ; SI: s_endpgm | |
419 define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { | |
420 %tid = call i32 @llvm.r600.read.tidig.x() #1 | |
421 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid | |
422 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | |
423 %a = load float, float addrspace(1)* %gep.in | |
424 | |
425 %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1 | |
426 %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 8) #1 | |
427 %or = or i1 %class0, %class1 | |
428 | |
429 %sext = sext i1 %or to i32 | |
430 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
431 ret void | |
432 } | |
433 | |
434 ; SI-LABEL: {{^}}test_fold_or_class_f32_2: | |
435 ; SI-NOT: v_cmp_class | |
436 ; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}} | |
437 ; SI-NOT: v_cmp_class | |
438 ; SI: s_endpgm | |
439 define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { | |
440 %tid = call i32 @llvm.r600.read.tidig.x() #1 | |
441 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid | |
442 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | |
443 %a = load float, float addrspace(1)* %gep.in | |
444 | |
445 %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 7) #1 | |
446 %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 7) #1 | |
447 %or = or i1 %class0, %class1 | |
448 | |
449 %sext = sext i1 %or to i32 | |
450 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
451 ret void | |
452 } | |
453 | |
454 ; SI-LABEL: {{^}}test_no_fold_or_class_f32_0: | |
455 ; SI-DAG: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 4{{$}} | |
456 ; SI-DAG: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 8{{$}} | |
457 ; SI: s_or_b64 | |
458 ; SI: s_endpgm | |
459 define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in, float %b) #0 { | |
460 %tid = call i32 @llvm.r600.read.tidig.x() #1 | |
461 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid | |
462 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | |
463 %a = load float, float addrspace(1)* %gep.in | |
464 | |
465 %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1 | |
466 %class1 = call i1 @llvm.amdgcn.class.f32(float %b, i32 8) #1 | |
467 %or = or i1 %class0, %class1 | |
468 | |
469 %sext = sext i1 %or to i32 | |
470 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
471 ret void | |
472 } | |
473 | |
474 ; SI-LABEL: {{^}}test_class_0_f32: | |
475 ; SI-NOT: v_cmp_class | |
476 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} | |
477 ; SI: buffer_store_dword [[RESULT]] | |
478 ; SI: s_endpgm | |
479 define void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 { | |
480 %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 0) #1 | |
481 %sext = sext i1 %result to i32 | |
482 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
483 ret void | |
484 } | |
485 | |
486 ; SI-LABEL: {{^}}test_class_0_f64: | |
487 ; SI-NOT: v_cmp_class | |
488 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} | |
489 ; SI: buffer_store_dword [[RESULT]] | |
490 ; SI: s_endpgm | |
491 define void @test_class_0_f64(i32 addrspace(1)* %out, double %a) #0 { | |
492 %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 0) #1 | |
493 %sext = sext i1 %result to i32 | |
494 store i32 %sext, i32 addrspace(1)* %out, align 4 | |
495 ret void | |
496 } | |
497 | |
498 attributes #0 = { nounwind } | |
499 attributes #1 = { nounwind readnone } |