121
|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
|
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3
|
120
|
4 declare i32 @llvm.amdgcn.workitem.id.x() #1
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5
|
121
|
6 ; GCN-LABEL: {{^}}v_cnd_nan_nosgpr:
|
|
7 ; GCN: v_cmp_eq_u32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0
|
|
8 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]}}, -1, v{{[0-9]+}}, [[COND]]
|
|
9 ; GCN-DAG: v{{[0-9]}}
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 ; All nan values are converted to 0xffffffff
|
121
|
11 ; GCN: s_endpgm
|
|
12 define amdgpu_kernel void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
|
120
|
13 %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx
|
121
|
15 %f = load float, float addrspace(1)* %f.gep
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 %setcc = icmp ne i32 %c, 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 store float %select, float addrspace(1)* %out
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 ; This requires slightly trickier SGPR operand legalization since the
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 ; single constant bus SGPR usage is the last operand, and it should
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 ; never be moved.
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26
|
121
|
27 ; GCN-LABEL: {{^}}v_cnd_nan:
|
|
28 ; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0
|
|
29 ; GCN: v_cndmask_b32_e32 v{{[0-9]}}, -1, v{{[0-9]}}, vcc
|
|
30 ; GCN-DAG: v{{[0-9]}}
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31 ; All nan values are converted to 0xffffffff
|
121
|
32 ; GCN: s_endpgm
|
|
33 define amdgpu_kernel void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) #0 {
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 %setcc = icmp ne i32 %c, 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 store float %select, float addrspace(1)* %out
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
38 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
39
|
121
|
40 ; Test different compare and select operand types for optimal code
|
|
41 ; shrinking.
|
|
42 ; (select (cmp (sgprX, constant)), constant, sgprZ)
|
|
43
|
|
44 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprZ_f32:
|
|
45 ; GCN: s_load_dword [[X:s[0-9]+]]
|
|
46 ; GCN: s_load_dword [[Z:s[0-9]+]]
|
|
47 ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
|
|
48 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
|
|
49 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc
|
|
50 define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float %x, float %z) #0 {
|
|
51 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
52 %tid.ext = sext i32 %tid to i64
|
|
53 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
|
54 %setcc = fcmp one float %x, 0.0
|
|
55 %select = select i1 %setcc, float 1.0, float %z
|
|
56 store float %select, float addrspace(1)* %out.gep
|
|
57 ret void
|
|
58 }
|
|
59
|
|
60 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprX_f32:
|
|
61 ; GCN: s_load_dword [[X:s[0-9]+]]
|
|
62 ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
|
|
63 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
|
|
64 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc
|
|
65 define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)* %out, float %x) #0 {
|
|
66 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
67 %tid.ext = sext i32 %tid to i64
|
|
68 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
|
69 %setcc = fcmp one float %x, 0.0
|
|
70 %select = select i1 %setcc, float 1.0, float %x
|
|
71 store float %select, float addrspace(1)* %out.gep
|
|
72 ret void
|
|
73 }
|
|
74
|
|
75 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprZ_f32:
|
|
76 ; GCN: s_load_dword [[X:s[0-9]+]]
|
|
77 ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
|
|
78 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
|
|
79 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], vcc
|
|
80 define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(float addrspace(1)* %out, float %x, float %z) #0 {
|
|
81 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
82 %tid.ext = sext i32 %tid to i64
|
|
83 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
|
84 %setcc = fcmp one float %x, 0.0
|
|
85 %select = select i1 %setcc, float 0.0, float %z
|
|
86 store float %select, float addrspace(1)* %out.gep
|
|
87 ret void
|
|
88 }
|
|
89
|
|
90 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprX_f32:
|
|
91 ; GCN: s_load_dword [[X:s[0-9]+]]
|
|
92 ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
|
|
93 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
|
|
94 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], vcc
|
|
95 define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* %out, float %x) #0 {
|
|
96 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
97 %tid.ext = sext i32 %tid to i64
|
|
98 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
|
99 %setcc = fcmp one float %x, 0.0
|
|
100 %select = select i1 %setcc, float 0.0, float %x
|
|
101 store float %select, float addrspace(1)* %out.gep
|
|
102 ret void
|
|
103 }
|
|
104
|
|
105 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_vgprZ_f32:
|
|
106 ; GCN-DAG: s_load_dword [[X:s[0-9]+]]
|
|
107 ; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
|
|
108 ; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0
|
|
109 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 0, [[Z]], [[COND]]
|
|
110 define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 {
|
|
111 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
112 %tid.ext = sext i32 %tid to i64
|
|
113 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
|
|
114 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
|
115 %z = load float, float addrspace(1)* %z.gep
|
|
116 %setcc = fcmp one float %x, 0.0
|
|
117 %select = select i1 %setcc, float 0.0, float %z
|
|
118 store float %select, float addrspace(1)* %out.gep
|
|
119 ret void
|
|
120 }
|
|
121
|
|
122 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_vgprZ_f32:
|
|
123 ; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
|
|
124 ; GCN-DAG: s_load_dword [[X:s[0-9]+]]
|
|
125 ; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0
|
|
126 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 1.0, [[Z]], [[COND]]
|
|
127 define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 {
|
|
128 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
129 %tid.ext = sext i32 %tid to i64
|
|
130 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
|
|
131 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
|
132 %z = load float, float addrspace(1)* %z.gep
|
|
133 %setcc = fcmp one float %x, 0.0
|
|
134 %select = select i1 %setcc, float 1.0, float %z
|
|
135 store float %select, float addrspace(1)* %out.gep
|
|
136 ret void
|
|
137 }
|
|
138
|
|
139 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_sgprZ_f32:
|
|
140 ; GCN-DAG: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
|
141 ; GCN-DAG: s_load_dword [[Z:s[0-9]+]]
|
|
142 ; GCN-DAG: v_cmp_ngt_f32_e32 vcc, 0, [[X]]
|
|
143 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
|
|
144 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc
|
|
145 define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float %z) #0 {
|
|
146 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
147 %tid.ext = sext i32 %tid to i64
|
|
148 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
|
|
149 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
|
150 %x = load float, float addrspace(1)* %x.gep
|
|
151 %setcc = fcmp olt float %x, 0.0
|
|
152 %select = select i1 %setcc, float 1.0, float %z
|
|
153 store float %select, float addrspace(1)* %out.gep
|
|
154 ret void
|
|
155 }
|
|
156
|
|
157 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_vgprZ_f32:
|
|
158 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
|
159 ; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
|
|
160 ; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]]
|
|
161 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[Z]], vcc
|
|
162 define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 {
|
|
163 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
164 %tid.ext = sext i32 %tid to i64
|
|
165 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
|
|
166 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
|
|
167 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
|
168 %x = load volatile float, float addrspace(1)* %x.gep
|
|
169 %z = load volatile float, float addrspace(1)* %z.gep
|
|
170 %setcc = fcmp ult float %x, 0.0
|
|
171 %select = select i1 %setcc, float 1.0, float %z
|
|
172 store float %select, float addrspace(1)* %out.gep
|
|
173 ret void
|
|
174 }
|
|
175
|
|
176 ; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i32:
|
|
177 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
|
178 ; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
|
|
179 ; GCN: v_cmp_lt_i32_e32 vcc, -1, [[X]]
|
|
180 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 2, [[Z]], vcc
|
|
181 define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i32 addrspace(1)* %z.ptr) #0 {
|
|
182 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
183 %tid.ext = sext i32 %tid to i64
|
|
184 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext
|
|
185 %z.gep = getelementptr inbounds i32, i32 addrspace(1)* %z.ptr, i64 %tid.ext
|
|
186 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
|
187 %x = load volatile i32, i32 addrspace(1)* %x.gep
|
|
188 %z = load volatile i32, i32 addrspace(1)* %z.gep
|
|
189 %setcc = icmp slt i32 %x, 0
|
|
190 %select = select i1 %setcc, i32 2, i32 %z
|
|
191 store i32 %select, i32 addrspace(1)* %out.gep
|
|
192 ret void
|
|
193 }
|
|
194
|
|
195 ; FIXME: Why does VI make the wrong regalloc choice?
|
|
196 ; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i64:
|
|
197 ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[X_LO:[0-9]+]]:[[X_HI:[0-9]+]]{{\]}}
|
|
198 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[Z_LO:[0-9]+]]:[[Z_HI:[0-9]+]]{{\]}}
|
|
199 ; SI-DAG: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[X_LO]]:[[X_HI]]{{\]}}
|
|
200 ; SI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v[[Z_HI]], vcc
|
|
201 ; SI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v[[Z_LO]], vcc
|
|
202
|
|
203 ; VI-DAG: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[X_LO]]:[[X_HI]]{{\]}}
|
|
204 ; VI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v[[Z_HI]], vcc
|
|
205 ; VI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v[[Z_LO]], vcc
|
|
206 define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 {
|
|
207 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
208 %tid.ext = sext i32 %tid to i64
|
|
209 %x.gep = getelementptr inbounds i64, i64 addrspace(1)* %x.ptr, i64 %tid.ext
|
|
210 %z.gep = getelementptr inbounds i64, i64 addrspace(1)* %z.ptr, i64 %tid.ext
|
|
211 %out.gep = getelementptr inbounds i64, i64 addrspace(1)* %out, i64 %tid.ext
|
|
212 %x = load volatile i64, i64 addrspace(1)* %x.gep
|
|
213 %z = load volatile i64, i64 addrspace(1)* %z.gep
|
|
214 %setcc = icmp slt i64 %x, 0
|
|
215 %select = select i1 %setcc, i64 2, i64 %z
|
|
216 store i64 %select, i64 addrspace(1)* %out.gep
|
|
217 ret void
|
|
218 }
|
|
219
|
|
220 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_vgprZ_k1_v4f32:
|
|
221 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
|
222 ; GCN: {{buffer|flat}}_load_dwordx4
|
|
223
|
|
224 ; GCN: v_cmp_nge_f32_e32 vcc, 4.0, [[X]]
|
|
225 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
|
|
226 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc
|
|
227 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc
|
|
228 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc
|
|
229 define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 {
|
|
230 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
231 %tid.ext = sext i32 %tid to i64
|
|
232 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
|
|
233 %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext
|
|
234 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
|
|
235 %x = load volatile float, float addrspace(1)* %x.gep
|
|
236 %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep
|
|
237 %setcc = fcmp ugt float %x, 4.0
|
|
238 %select = select i1 %setcc, <4 x float> %z, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0>
|
|
239 store <4 x float> %select, <4 x float> addrspace(1)* %out.gep
|
|
240 ret void
|
|
241 }
|
|
242
|
|
243 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_vgprZ_v4f32:
|
|
244 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
|
245 ; GCN: {{buffer|flat}}_load_dwordx4
|
|
246
|
|
247 ; GCN: v_cmp_ge_f32_e32 vcc, 4.0, [[X]]
|
|
248 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
|
|
249 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc
|
|
250 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc
|
|
251 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc
|
|
252 define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 {
|
|
253 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
254 %tid.ext = sext i32 %tid to i64
|
|
255 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
|
|
256 %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext
|
|
257 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
|
|
258 %x = load volatile float, float addrspace(1)* %x.gep
|
|
259 %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep
|
|
260 %setcc = fcmp ugt float %x, 4.0
|
|
261 %select = select i1 %setcc, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0>, <4 x float> %z
|
|
262 store <4 x float> %select, <4 x float> addrspace(1)* %out.gep
|
|
263 ret void
|
|
264 }
|
|
265
|
|
266 ; This must be swapped as a vector type before the condition has
|
|
267 ; multiple uses.
|
|
268
|
|
269 ; GCN-LABEL: {{^}}fcmp_k0_vgprX_select_k1_vgprZ_v4f32:
|
|
270 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
|
271 ; GCN: {{buffer|flat}}_load_dwordx4
|
|
272
|
|
273 ; GCN: v_cmp_le_f32_e32 vcc, 4.0, [[X]]
|
|
274 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
|
|
275 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc
|
|
276 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc
|
|
277 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc
|
|
278 define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 {
|
|
279 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
280 %tid.ext = sext i32 %tid to i64
|
|
281 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
|
|
282 %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext
|
|
283 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
|
|
284 %x = load volatile float, float addrspace(1)* %x.gep
|
|
285 %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep
|
|
286 %setcc = fcmp ugt float 4.0, %x
|
|
287 %select = select i1 %setcc, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0>, <4 x float> %z
|
|
288 store <4 x float> %select, <4 x float> addrspace(1)* %out.gep
|
|
289 ret void
|
|
290 }
|
|
291
|
|
292 ; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i1:
|
|
293 ; GCN: load_dword
|
|
294 ; GCN: load_ubyte
|
|
295 ; GCN-DAG: v_cmp_gt_i32_e32 vcc, 0, v
|
|
296 ; DCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 1,
|
|
297 ; GCN-DAG: v_cmp_eq_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1, v
|
|
298 ; GCN-DAG: s_or_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, s{{\[[0-9]+:[0-9]+\]}}
|
|
299 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s
|
|
300 ; GCN: store_byte
|
|
301 define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i1 addrspace(1)* %z.ptr) #0 {
|
|
302 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
303 %tid.ext = sext i32 %tid to i64
|
|
304 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext
|
|
305 %z.gep = getelementptr inbounds i1, i1 addrspace(1)* %z.ptr, i64 %tid.ext
|
|
306 %out.gep = getelementptr inbounds i1, i1 addrspace(1)* %out, i64 %tid.ext
|
|
307 %x = load volatile i32, i32 addrspace(1)* %x.gep
|
|
308 %z = load volatile i1, i1 addrspace(1)* %z.gep
|
|
309 %setcc = icmp slt i32 %x, 0
|
|
310 %select = select i1 %setcc, i1 true, i1 %z
|
|
311 store i1 %select, i1 addrspace(1)* %out.gep
|
|
312 ret void
|
|
313 }
|
|
314
|
|
315 ; Different types compared vs. selected
|
|
316 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_selectf64_k1_vgprZ_f32:
|
|
317 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
|
318 ; GCN: {{buffer|flat}}_load_dwordx2
|
|
319
|
|
320 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3ff00000
|
|
321 ; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]]
|
|
322 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, vcc
|
|
323 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc
|
|
324 define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(double addrspace(1)* %out, float addrspace(1)* %x.ptr, double addrspace(1)* %z.ptr) #0 {
|
|
325 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
326 %tid.ext = sext i32 %tid to i64
|
|
327 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
|
|
328 %z.gep = getelementptr inbounds double, double addrspace(1)* %z.ptr, i64 %tid.ext
|
|
329 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
|
|
330 %x = load volatile float, float addrspace(1)* %x.gep
|
|
331 %z = load volatile double, double addrspace(1)* %z.gep
|
|
332 %setcc = fcmp ult float %x, 0.0
|
|
333 %select = select i1 %setcc, double 1.0, double %z
|
|
334 store double %select, double addrspace(1)* %out.gep
|
|
335 ret void
|
|
336 }
|
|
337
|
|
338 ; Different types compared vs. selected
|
|
339 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_selecti64_k1_vgprZ_f32:
|
|
340 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
|
341 ; GCN: {{buffer|flat}}_load_dwordx2
|
|
342
|
|
343 ; GCN: v_cmp_nlg_f32_e32 vcc, 0, [[X]]
|
|
344 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 3, v{{[0-9]+}}, vcc
|
|
345 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc
|
|
346 define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(i64 addrspace(1)* %out, float addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 {
|
|
347 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
348 %tid.ext = sext i32 %tid to i64
|
|
349 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
|
|
350 %z.gep = getelementptr inbounds i64, i64 addrspace(1)* %z.ptr, i64 %tid.ext
|
|
351 %out.gep = getelementptr inbounds i64, i64 addrspace(1)* %out, i64 %tid.ext
|
|
352 %x = load volatile float, float addrspace(1)* %x.gep
|
|
353 %z = load volatile i64, i64 addrspace(1)* %z.gep
|
|
354 %setcc = fcmp one float %x, 0.0
|
|
355 %select = select i1 %setcc, i64 3, i64 %z
|
|
356 store i64 %select, i64 addrspace(1)* %out.gep
|
|
357 ret void
|
|
358 }
|
|
359
|
|
360 ; Different types compared vs. selected
|
|
361 ; GCN-LABEL: {{^}}icmp_vgprX_k0_selectf32_k1_vgprZ_i32:
|
|
362 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
|
363 ; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
|
|
364
|
|
365 ; GCN: v_cmp_gt_u32_e32 vcc, 2, [[X]]
|
|
366 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[Z]], vcc
|
|
367 define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(float addrspace(1)* %out, i32 addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 {
|
|
368 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
369 %tid.ext = sext i32 %tid to i64
|
|
370 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext
|
|
371 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
|
|
372 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
|
373 %x = load volatile i32, i32 addrspace(1)* %x.gep
|
|
374 %z = load volatile float, float addrspace(1)* %z.gep
|
|
375 %setcc = icmp ugt i32 %x, 1
|
|
376 %select = select i1 %setcc, float 4.0, float %z
|
|
377 store float %select, float addrspace(1)* %out.gep
|
|
378 ret void
|
|
379 }
|
|
380
|
|
381 ; FIXME: Should be able to handle multiple uses
|
|
382
|
|
383 ; GCN-LABEL: {{^}}fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2:
|
|
384 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
|
385
|
|
386 ; GCN: v_cmp_nle_f32_e32 vcc, 4.0, [[X]]
|
|
387 ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -1.0, vcc
|
|
388 ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -2.0, vcc
|
|
389 define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 {
|
|
390 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
391 %tid.ext = sext i32 %tid to i64
|
|
392 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
|
|
393 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
|
|
394 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
|
395 %x = load volatile float, float addrspace(1)* %x.gep
|
|
396 %z = load volatile float, float addrspace(1)* %z.gep
|
|
397 %setcc = fcmp ugt float 4.0, %x
|
|
398 %select0 = select i1 %setcc, float -1.0, float %z
|
|
399 %select1 = select i1 %setcc, float -2.0, float %z
|
|
400 store volatile float %select0, float addrspace(1)* %out.gep
|
|
401 store volatile float %select1, float addrspace(1)* %out.gep
|
|
402 ret void
|
|
403 }
|
|
404
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
405 attributes #0 = { nounwind }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
406 attributes #1 = { nounwind readnone }
|