annotate llvm/test/CodeGen/AMDGPU/clamp.ll @ 206:f17a3b42b08b

Added tag before-12 for changeset b7591485f4cd
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 07 Jun 2021 21:25:57 +0900
parents 0572611fdcc8
children 2e18cbf3894f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,GFX678 %s
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,GFX678 %s
150
anatofuz
parents:
diff changeset
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
anatofuz
parents:
diff changeset
4
anatofuz
parents:
diff changeset
5 ; GCN-LABEL: {{^}}v_clamp_f32:
anatofuz
parents:
diff changeset
6 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
7 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
8 define amdgpu_kernel void @v_clamp_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
9 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
10 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
11 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
12 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
13 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
anatofuz
parents:
diff changeset
14 %med = call float @llvm.minnum.f32(float %max, float 1.0)
anatofuz
parents:
diff changeset
15
anatofuz
parents:
diff changeset
16 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
17 ret void
anatofuz
parents:
diff changeset
18 }
anatofuz
parents:
diff changeset
19
anatofuz
parents:
diff changeset
20 ; GCN-LABEL: {{^}}v_clamp_neg_f32:
anatofuz
parents:
diff changeset
21 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
22 ; GCN: v_max_f32_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
anatofuz
parents:
diff changeset
23 define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
24 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
25 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
26 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
27 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
28 %fneg.a = fsub float -0.0, %a
anatofuz
parents:
diff changeset
29 %max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0)
anatofuz
parents:
diff changeset
30 %med = call float @llvm.minnum.f32(float %max, float 1.0)
anatofuz
parents:
diff changeset
31
anatofuz
parents:
diff changeset
32 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
33 ret void
anatofuz
parents:
diff changeset
34 }
anatofuz
parents:
diff changeset
35
anatofuz
parents:
diff changeset
36 ; GCN-LABEL: {{^}}v_clamp_negabs_f32:
anatofuz
parents:
diff changeset
37 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
38 ; GCN: v_max_f32_e64 v{{[0-9]+}}, -|[[A]]|, -|[[A]]| clamp{{$}}
anatofuz
parents:
diff changeset
39 define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
40 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
41 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
42 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
43 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
44 %fabs.a = call float @llvm.fabs.f32(float %a)
anatofuz
parents:
diff changeset
45 %fneg.fabs.a = fsub float -0.0, %fabs.a
anatofuz
parents:
diff changeset
46
anatofuz
parents:
diff changeset
47 %max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0)
anatofuz
parents:
diff changeset
48 %med = call float @llvm.minnum.f32(float %max, float 1.0)
anatofuz
parents:
diff changeset
49
anatofuz
parents:
diff changeset
50 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
51 ret void
anatofuz
parents:
diff changeset
52 }
anatofuz
parents:
diff changeset
53
anatofuz
parents:
diff changeset
54 ; GCN-LABEL: {{^}}v_clamp_negzero_f32:
anatofuz
parents:
diff changeset
55 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
56 ; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], 0.5, [[A]]
anatofuz
parents:
diff changeset
57 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0x80000000, [[ADD]]
anatofuz
parents:
diff changeset
58 ; GCN: v_min_f32_e32 v{{[0-9]+}}, 1.0, [[MAX]]
anatofuz
parents:
diff changeset
59 define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
60 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
61 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
62 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
63 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
64 %add = fadd nnan float %a, 0.5
anatofuz
parents:
diff changeset
65 %max = call float @llvm.maxnum.f32(float %add, float -0.0)
anatofuz
parents:
diff changeset
66 %med = call float @llvm.minnum.f32(float %max, float 1.0)
anatofuz
parents:
diff changeset
67
anatofuz
parents:
diff changeset
68 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
69 ret void
anatofuz
parents:
diff changeset
70 }
anatofuz
parents:
diff changeset
71
anatofuz
parents:
diff changeset
72 ; FIXME: Weird inconsistency in how -0.0 is treated. Accepted if clamp
anatofuz
parents:
diff changeset
73 ; matched through med3, not if directly. Is this correct?
anatofuz
parents:
diff changeset
74
anatofuz
parents:
diff changeset
75 ; GCN-LABEL: {{^}}v_clamp_negzero_maybe_snan_f32:
anatofuz
parents:
diff changeset
76 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
77 ; GFX678: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
78 ; GFX9: v_max_f32_e32 [[QUIET:v[0-9]+]], [[A]], [[A]]
150
anatofuz
parents:
diff changeset
79 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0x80000000, [[QUIET]]
anatofuz
parents:
diff changeset
80 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
anatofuz
parents:
diff changeset
81 define amdgpu_kernel void @v_clamp_negzero_maybe_snan_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
82 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
83 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
84 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
85 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
86 %max = call float @llvm.maxnum.f32(float %a, float -0.0)
anatofuz
parents:
diff changeset
87 %med = call float @llvm.minnum.f32(float %max, float 1.0)
anatofuz
parents:
diff changeset
88
anatofuz
parents:
diff changeset
89 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
90 ret void
anatofuz
parents:
diff changeset
91 }
anatofuz
parents:
diff changeset
92
anatofuz
parents:
diff changeset
93 ; GCN-LABEL: {{^}}v_clamp_multi_use_max_f32:
anatofuz
parents:
diff changeset
94 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
95 ; GFX678: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
96 ; GFX9: v_max_f32_e32 [[QUIET_A:v[0-9]+]], [[A]], [[A]]
150
anatofuz
parents:
diff changeset
97 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]]
anatofuz
parents:
diff changeset
98 ; GCN: v_min_f32_e32 [[MED:v[0-9]+]], 1.0, [[QUIET_A]]
anatofuz
parents:
diff changeset
99 ; GCN-NOT: [[MAX]]
anatofuz
parents:
diff changeset
100 ; GCN-NOT: [[MED]]
anatofuz
parents:
diff changeset
101
anatofuz
parents:
diff changeset
102 ; SI: buffer_store_dword [[MED]]
anatofuz
parents:
diff changeset
103 ; SI: buffer_store_dword [[MAX]]
anatofuz
parents:
diff changeset
104
anatofuz
parents:
diff changeset
105 ; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MED]]
anatofuz
parents:
diff changeset
106 ; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX]]
anatofuz
parents:
diff changeset
107 define amdgpu_kernel void @v_clamp_multi_use_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
108 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
109 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
110 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
111 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
112 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
anatofuz
parents:
diff changeset
113 %med = call float @llvm.minnum.f32(float %max, float 1.0)
anatofuz
parents:
diff changeset
114
anatofuz
parents:
diff changeset
115 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
116 store volatile float %max, float addrspace(1)* undef
anatofuz
parents:
diff changeset
117 ret void
anatofuz
parents:
diff changeset
118 }
anatofuz
parents:
diff changeset
119
anatofuz
parents:
diff changeset
120 ; GCN-LABEL: {{^}}v_clamp_f16:
anatofuz
parents:
diff changeset
121 ; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
122 ; GFX89: v_max_f16_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
123
anatofuz
parents:
diff changeset
124 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
125 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
anatofuz
parents:
diff changeset
126 define amdgpu_kernel void @v_clamp_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
127 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
128 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
129 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
130 %a = load half, half addrspace(1)* %gep0
anatofuz
parents:
diff changeset
131 %max = call half @llvm.maxnum.f16(half %a, half 0.0)
anatofuz
parents:
diff changeset
132 %med = call half @llvm.minnum.f16(half %max, half 1.0)
anatofuz
parents:
diff changeset
133
anatofuz
parents:
diff changeset
134 store half %med, half addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
135 ret void
anatofuz
parents:
diff changeset
136 }
anatofuz
parents:
diff changeset
137
anatofuz
parents:
diff changeset
138 ; GCN-LABEL: {{^}}v_clamp_neg_f16:
anatofuz
parents:
diff changeset
139 ; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
140 ; GFX89: v_max_f16_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
anatofuz
parents:
diff changeset
141
anatofuz
parents:
diff changeset
142 ; FIXME: Better to fold neg into max
anatofuz
parents:
diff changeset
143 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] clamp{{$}}
anatofuz
parents:
diff changeset
144 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
anatofuz
parents:
diff changeset
145 define amdgpu_kernel void @v_clamp_neg_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
146 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
147 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
148 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
149 %a = load half, half addrspace(1)* %gep0
anatofuz
parents:
diff changeset
150 %fneg.a = fsub half -0.0, %a
anatofuz
parents:
diff changeset
151 %max = call half @llvm.maxnum.f16(half %fneg.a, half 0.0)
anatofuz
parents:
diff changeset
152 %med = call half @llvm.minnum.f16(half %max, half 1.0)
anatofuz
parents:
diff changeset
153
anatofuz
parents:
diff changeset
154 store half %med, half addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
155 ret void
anatofuz
parents:
diff changeset
156 }
anatofuz
parents:
diff changeset
157
anatofuz
parents:
diff changeset
158 ; GCN-LABEL: {{^}}v_clamp_negabs_f16:
anatofuz
parents:
diff changeset
159 ; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
160 ; GFX89: v_max_f16_e64 v{{[0-9]+}}, -|[[A]]|, -|[[A]]| clamp{{$}}
anatofuz
parents:
diff changeset
161
anatofuz
parents:
diff changeset
162 ; FIXME: Better to fold neg/abs into max
anatofuz
parents:
diff changeset
163
anatofuz
parents:
diff changeset
164 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[A]]| clamp{{$}}
anatofuz
parents:
diff changeset
165 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
anatofuz
parents:
diff changeset
166 define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
167 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
168 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
169 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
170 %a = load half, half addrspace(1)* %gep0
anatofuz
parents:
diff changeset
171 %fabs.a = call half @llvm.fabs.f16(half %a)
anatofuz
parents:
diff changeset
172 %fneg.fabs.a = fsub half -0.0, %fabs.a
anatofuz
parents:
diff changeset
173
anatofuz
parents:
diff changeset
174 %max = call half @llvm.maxnum.f16(half %fneg.fabs.a, half 0.0)
anatofuz
parents:
diff changeset
175 %med = call half @llvm.minnum.f16(half %max, half 1.0)
anatofuz
parents:
diff changeset
176
anatofuz
parents:
diff changeset
177 store half %med, half addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
178 ret void
anatofuz
parents:
diff changeset
179 }
anatofuz
parents:
diff changeset
180
anatofuz
parents:
diff changeset
181 ; FIXME: Do f64 instructions support clamp?
anatofuz
parents:
diff changeset
182 ; GCN-LABEL: {{^}}v_clamp_f64:
anatofuz
parents:
diff changeset
183 ; GCN: {{buffer|flat|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
anatofuz
parents:
diff changeset
184 ; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
185 define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
186 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
187 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
188 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
189 %a = load double, double addrspace(1)* %gep0
anatofuz
parents:
diff changeset
190 %max = call double @llvm.maxnum.f64(double %a, double 0.0)
anatofuz
parents:
diff changeset
191 %med = call double @llvm.minnum.f64(double %max, double 1.0)
anatofuz
parents:
diff changeset
192
anatofuz
parents:
diff changeset
193 store double %med, double addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
194 ret void
anatofuz
parents:
diff changeset
195 }
anatofuz
parents:
diff changeset
196
anatofuz
parents:
diff changeset
197 ; GCN-LABEL: {{^}}v_clamp_neg_f64:
anatofuz
parents:
diff changeset
198 ; GCN: {{buffer|flat|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
anatofuz
parents:
diff changeset
199 ; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -[[A]], -[[A]] clamp{{$}}
anatofuz
parents:
diff changeset
200 define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
201 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
202 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
203 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
204 %a = load double, double addrspace(1)* %gep0
anatofuz
parents:
diff changeset
205 %fneg.a = fsub double -0.0, %a
anatofuz
parents:
diff changeset
206 %max = call double @llvm.maxnum.f64(double %fneg.a, double 0.0)
anatofuz
parents:
diff changeset
207 %med = call double @llvm.minnum.f64(double %max, double 1.0)
anatofuz
parents:
diff changeset
208
anatofuz
parents:
diff changeset
209 store double %med, double addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
210 ret void
anatofuz
parents:
diff changeset
211 }
anatofuz
parents:
diff changeset
212
anatofuz
parents:
diff changeset
213 ; GCN-LABEL: {{^}}v_clamp_negabs_f64:
anatofuz
parents:
diff changeset
214 ; GCN: {{buffer|flat|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
anatofuz
parents:
diff changeset
215 ; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -|[[A]]|, -|[[A]]| clamp{{$}}
anatofuz
parents:
diff changeset
216 define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
217 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
218 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
219 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
220 %a = load double, double addrspace(1)* %gep0
anatofuz
parents:
diff changeset
221 %fabs.a = call double @llvm.fabs.f64(double %a)
anatofuz
parents:
diff changeset
222 %fneg.fabs.a = fsub double -0.0, %fabs.a
anatofuz
parents:
diff changeset
223
anatofuz
parents:
diff changeset
224 %max = call double @llvm.maxnum.f64(double %fneg.fabs.a, double 0.0)
anatofuz
parents:
diff changeset
225 %med = call double @llvm.minnum.f64(double %max, double 1.0)
anatofuz
parents:
diff changeset
226
anatofuz
parents:
diff changeset
227 store double %med, double addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
228 ret void
anatofuz
parents:
diff changeset
229 }
anatofuz
parents:
diff changeset
230
anatofuz
parents:
diff changeset
231 ; GCN-LABEL: {{^}}v_clamp_med3_aby_negzero_f32:
anatofuz
parents:
diff changeset
232 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
233 ; GCN: v_med3_f32
anatofuz
parents:
diff changeset
234 define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
235 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
236 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
237 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
238 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
239 %med = call float @llvm.amdgcn.fmed3.f32(float -0.0, float 1.0, float %a)
anatofuz
parents:
diff changeset
240 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
241 ret void
anatofuz
parents:
diff changeset
242 }
anatofuz
parents:
diff changeset
243
anatofuz
parents:
diff changeset
244 ; GCN-LABEL: {{^}}v_clamp_med3_aby_f32:
anatofuz
parents:
diff changeset
245 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
246 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
247 define amdgpu_kernel void @v_clamp_med3_aby_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
248 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
249 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
250 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
251 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
252 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
anatofuz
parents:
diff changeset
253 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
254 ret void
anatofuz
parents:
diff changeset
255 }
anatofuz
parents:
diff changeset
256
anatofuz
parents:
diff changeset
257 ; GCN-LABEL: {{^}}v_clamp_med3_bay_f32:
anatofuz
parents:
diff changeset
258 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
259 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
260 define amdgpu_kernel void @v_clamp_med3_bay_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
261 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
262 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
263 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
264 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
265 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
anatofuz
parents:
diff changeset
266 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
267 ret void
anatofuz
parents:
diff changeset
268 }
anatofuz
parents:
diff changeset
269
anatofuz
parents:
diff changeset
270 ; GCN-LABEL: {{^}}v_clamp_med3_yab_f32:
anatofuz
parents:
diff changeset
271 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
272 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
273 define amdgpu_kernel void @v_clamp_med3_yab_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
274 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
275 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
276 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
277 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
278 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
anatofuz
parents:
diff changeset
279 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
280 ret void
anatofuz
parents:
diff changeset
281 }
anatofuz
parents:
diff changeset
282
anatofuz
parents:
diff changeset
283 ; GCN-LABEL: {{^}}v_clamp_med3_yba_f32:
anatofuz
parents:
diff changeset
284 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
285 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
286 define amdgpu_kernel void @v_clamp_med3_yba_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
287 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
288 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
289 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
290 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
291 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
anatofuz
parents:
diff changeset
292 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
293 ret void
anatofuz
parents:
diff changeset
294 }
anatofuz
parents:
diff changeset
295
anatofuz
parents:
diff changeset
296 ; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32:
anatofuz
parents:
diff changeset
297 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
298 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
299 define amdgpu_kernel void @v_clamp_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
300 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
301 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
302 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
303 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
304 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
anatofuz
parents:
diff changeset
305 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
306 ret void
anatofuz
parents:
diff changeset
307 }
anatofuz
parents:
diff changeset
308
anatofuz
parents:
diff changeset
309 ; GCN-LABEL: {{^}}v_clamp_med3_bya_f32:
anatofuz
parents:
diff changeset
310 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
311 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
312 define amdgpu_kernel void @v_clamp_med3_bya_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
313 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
314 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
315 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
316 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
317 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
anatofuz
parents:
diff changeset
318 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
319 ret void
anatofuz
parents:
diff changeset
320 }
anatofuz
parents:
diff changeset
321
anatofuz
parents:
diff changeset
322 ; GCN-LABEL: {{^}}v_clamp_constants_to_one_f32:
anatofuz
parents:
diff changeset
323 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 1.0
anatofuz
parents:
diff changeset
324 define amdgpu_kernel void @v_clamp_constants_to_one_f32(float addrspace(1)* %out) #0 {
anatofuz
parents:
diff changeset
325 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
326 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
327 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 4.0)
anatofuz
parents:
diff changeset
328 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
329 ret void
anatofuz
parents:
diff changeset
330 }
anatofuz
parents:
diff changeset
331
anatofuz
parents:
diff changeset
332 ; GCN-LABEL: {{^}}v_clamp_constants_to_zero_f32:
anatofuz
parents:
diff changeset
333 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
anatofuz
parents:
diff changeset
334 define amdgpu_kernel void @v_clamp_constants_to_zero_f32(float addrspace(1)* %out) #0 {
anatofuz
parents:
diff changeset
335 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
336 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
337 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float -4.0)
anatofuz
parents:
diff changeset
338 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
339 ret void
anatofuz
parents:
diff changeset
340 }
anatofuz
parents:
diff changeset
341
anatofuz
parents:
diff changeset
342 ; GCN-LABEL: {{^}}v_clamp_constant_preserve_f32:
anatofuz
parents:
diff changeset
343 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0.5
anatofuz
parents:
diff changeset
344 define amdgpu_kernel void @v_clamp_constant_preserve_f32(float addrspace(1)* %out) #0 {
anatofuz
parents:
diff changeset
345 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
346 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
347 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0.5)
anatofuz
parents:
diff changeset
348 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
349 ret void
anatofuz
parents:
diff changeset
350 }
anatofuz
parents:
diff changeset
351
anatofuz
parents:
diff changeset
352 ; GCN-LABEL: {{^}}v_clamp_constant_preserve_denorm_f32:
anatofuz
parents:
diff changeset
353 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fffff{{$}}
anatofuz
parents:
diff changeset
354 define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(float addrspace(1)* %out) #0 {
anatofuz
parents:
diff changeset
355 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
356 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
357 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 8388607 to float))
anatofuz
parents:
diff changeset
358 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
359 ret void
anatofuz
parents:
diff changeset
360 }
anatofuz
parents:
diff changeset
361
anatofuz
parents:
diff changeset
362 ; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32:
anatofuz
parents:
diff changeset
363 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
anatofuz
parents:
diff changeset
364 define amdgpu_kernel void @v_clamp_constant_qnan_f32(float addrspace(1)* %out) #0 {
anatofuz
parents:
diff changeset
365 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
366 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
367 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
anatofuz
parents:
diff changeset
368 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
369 ret void
anatofuz
parents:
diff changeset
370 }
anatofuz
parents:
diff changeset
371
anatofuz
parents:
diff changeset
372 ; GCN-LABEL: {{^}}v_clamp_constant_snan_f32:
anatofuz
parents:
diff changeset
373 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
anatofuz
parents:
diff changeset
374 define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #0 {
anatofuz
parents:
diff changeset
375 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
376 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
377 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
anatofuz
parents:
diff changeset
378 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
379 ret void
anatofuz
parents:
diff changeset
380 }
anatofuz
parents:
diff changeset
381
anatofuz
parents:
diff changeset
382 ; ---------------------------------------------------------------------
anatofuz
parents:
diff changeset
383 ; Test non-default behaviors enabling snans and disabling dx10_clamp
anatofuz
parents:
diff changeset
384 ; ---------------------------------------------------------------------
anatofuz
parents:
diff changeset
385
anatofuz
parents:
diff changeset
386 ; GCN-LABEL: {{^}}v_clamp_f32_no_dx10_clamp:
anatofuz
parents:
diff changeset
387 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
388 ; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 0.5, [[A]]
anatofuz
parents:
diff changeset
389 ; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], 0, 1.0
anatofuz
parents:
diff changeset
390 define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
anatofuz
parents:
diff changeset
391 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
392 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
393 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
394 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
395 %a.nnan = fadd nnan float %a, 0.5
anatofuz
parents:
diff changeset
396 %max = call float @llvm.maxnum.f32(float %a.nnan, float 0.0)
anatofuz
parents:
diff changeset
397 %med = call float @llvm.minnum.f32(float %max, float 1.0)
anatofuz
parents:
diff changeset
398
anatofuz
parents:
diff changeset
399 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
400 ret void
anatofuz
parents:
diff changeset
401 }
anatofuz
parents:
diff changeset
402
anatofuz
parents:
diff changeset
403 ; GCN-LABEL: {{^}}v_clamp_f32_snan_dx10clamp:
anatofuz
parents:
diff changeset
404 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
405 ; GCN: v_add_f32_e64 [[ADD:v[0-9]+]], [[A]], 0.5 clamp{{$}}
anatofuz
parents:
diff changeset
406 define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #3 {
anatofuz
parents:
diff changeset
407 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
408 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
409 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
410 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
411 %add = fadd float %a, 0.5
anatofuz
parents:
diff changeset
412 %max = call float @llvm.maxnum.f32(float %add, float 0.0)
anatofuz
parents:
diff changeset
413 %med = call float @llvm.minnum.f32(float %max, float 1.0)
anatofuz
parents:
diff changeset
414
anatofuz
parents:
diff changeset
415 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
416 ret void
anatofuz
parents:
diff changeset
417 }
anatofuz
parents:
diff changeset
418
anatofuz
parents:
diff changeset
419 ; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp:
anatofuz
parents:
diff changeset
420 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
421 ; GFX678: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
422 ; GFX9: v_max_f32_e32 [[QUIET_A:v[0-9]+]], [[A]], [[A]]
150
anatofuz
parents:
diff changeset
423 ; GCN: v_med3_f32 {{v[0-9]+}}, [[QUIET_A]], 0, 1.0
anatofuz
parents:
diff changeset
424 define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
anatofuz
parents:
diff changeset
425 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
426 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
427 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
428 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
429 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
anatofuz
parents:
diff changeset
430 %med = call float @llvm.minnum.f32(float %max, float 1.0)
anatofuz
parents:
diff changeset
431
anatofuz
parents:
diff changeset
432 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
433 ret void
anatofuz
parents:
diff changeset
434 }
anatofuz
parents:
diff changeset
435
anatofuz
parents:
diff changeset
436 ; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp_nnan_src:
anatofuz
parents:
diff changeset
437 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
438 ; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, [[A]]
anatofuz
parents:
diff changeset
439 ; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], 0, 1.0
anatofuz
parents:
diff changeset
440 define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
anatofuz
parents:
diff changeset
441 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
442 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
443 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
444 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
445 %add = fadd nnan float %a, 1.0
anatofuz
parents:
diff changeset
446 %max = call float @llvm.maxnum.f32(float %add, float 0.0)
anatofuz
parents:
diff changeset
447 %med = call float @llvm.minnum.f32(float %max, float 1.0)
anatofuz
parents:
diff changeset
448
anatofuz
parents:
diff changeset
449 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
450 ret void
anatofuz
parents:
diff changeset
451 }
anatofuz
parents:
diff changeset
452
anatofuz
parents:
diff changeset
453 ; GCN-LABEL: {{^}}v_clamp_med3_aby_f32_no_dx10_clamp:
anatofuz
parents:
diff changeset
454 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
455 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
456 define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
anatofuz
parents:
diff changeset
457 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
458 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
459 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
460 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
461 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
anatofuz
parents:
diff changeset
462 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
463 ret void
anatofuz
parents:
diff changeset
464 }
anatofuz
parents:
diff changeset
465
anatofuz
parents:
diff changeset
466 ; GCN-LABEL: {{^}}v_clamp_med3_bay_f32_no_dx10_clamp:
anatofuz
parents:
diff changeset
467 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
468 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
469 define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
anatofuz
parents:
diff changeset
470 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
471 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
472 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
473 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
474 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
anatofuz
parents:
diff changeset
475 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
476 ret void
anatofuz
parents:
diff changeset
477 }
anatofuz
parents:
diff changeset
478
anatofuz
parents:
diff changeset
479 ; GCN-LABEL: {{^}}v_clamp_med3_yab_f32_no_dx10_clamp:
anatofuz
parents:
diff changeset
480 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
481 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
anatofuz
parents:
diff changeset
482 define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
anatofuz
parents:
diff changeset
483 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
484 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
485 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
486 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
487 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
anatofuz
parents:
diff changeset
488 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
489 ret void
anatofuz
parents:
diff changeset
490 }
anatofuz
parents:
diff changeset
491
anatofuz
parents:
diff changeset
492 ; GCN-LABEL: {{^}}v_clamp_med3_yba_f32_no_dx10_clamp:
anatofuz
parents:
diff changeset
493 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
494 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 1.0, 0
anatofuz
parents:
diff changeset
495 define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
anatofuz
parents:
diff changeset
496 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
497 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
498 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
499 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
500 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
anatofuz
parents:
diff changeset
501 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
502 ret void
anatofuz
parents:
diff changeset
503 }
anatofuz
parents:
diff changeset
504
anatofuz
parents:
diff changeset
505 ; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32_no_dx10_clamp:
anatofuz
parents:
diff changeset
506 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
507 ; GCN: v_med3_f32 v{{[0-9]+}}, 0, [[A]], 1.0
anatofuz
parents:
diff changeset
508 define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
anatofuz
parents:
diff changeset
509 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
510 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
511 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
512 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
513 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
anatofuz
parents:
diff changeset
514 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
515 ret void
anatofuz
parents:
diff changeset
516 }
anatofuz
parents:
diff changeset
517
anatofuz
parents:
diff changeset
518 ; GCN-LABEL: {{^}}v_clamp_med3_bya_f32_no_dx10_clamp:
anatofuz
parents:
diff changeset
519 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
520 ; GCN: v_med3_f32 v{{[0-9]+}}, 1.0, [[A]], 0
anatofuz
parents:
diff changeset
521 define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
anatofuz
parents:
diff changeset
522 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
523 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
524 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
525 %a = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
526 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
anatofuz
parents:
diff changeset
527 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
528 ret void
anatofuz
parents:
diff changeset
529 }
anatofuz
parents:
diff changeset
530
anatofuz
parents:
diff changeset
531 ; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32_no_dx10_clamp:
anatofuz
parents:
diff changeset
532 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fc00000
anatofuz
parents:
diff changeset
533 define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
anatofuz
parents:
diff changeset
534 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
535 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
536 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
anatofuz
parents:
diff changeset
537 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
538 ret void
anatofuz
parents:
diff changeset
539 }
anatofuz
parents:
diff changeset
540
anatofuz
parents:
diff changeset
541 ; GCN-LABEL: {{^}}v_clamp_constant_snan_f32_no_dx10_clamp:
anatofuz
parents:
diff changeset
542 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7f800001
anatofuz
parents:
diff changeset
543 define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
anatofuz
parents:
diff changeset
544 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
545 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
546 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
anatofuz
parents:
diff changeset
547 store float %med, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
548 ret void
anatofuz
parents:
diff changeset
549 }
anatofuz
parents:
diff changeset
550
anatofuz
parents:
diff changeset
551 ; GCN-LABEL: {{^}}v_clamp_v2f16:
anatofuz
parents:
diff changeset
552 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
553 ; GFX9-NOT: [[A]]
anatofuz
parents:
diff changeset
554 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
555 define amdgpu_kernel void @v_clamp_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
556 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
557 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
558 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
559 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
anatofuz
parents:
diff changeset
560 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> zeroinitializer)
anatofuz
parents:
diff changeset
561 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
anatofuz
parents:
diff changeset
562
anatofuz
parents:
diff changeset
563 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
564 ret void
anatofuz
parents:
diff changeset
565 }
anatofuz
parents:
diff changeset
566
anatofuz
parents:
diff changeset
567 ; GCN-LABEL: {{^}}v_clamp_v2f16_undef_elt:
anatofuz
parents:
diff changeset
568 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
569 ; GFX9-NOT: [[A]]
anatofuz
parents:
diff changeset
570 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
571 define amdgpu_kernel void @v_clamp_v2f16_undef_elt(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
572 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
573 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
574 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
575 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
anatofuz
parents:
diff changeset
576 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half undef, half 0.0>)
anatofuz
parents:
diff changeset
577 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half undef>)
anatofuz
parents:
diff changeset
578
anatofuz
parents:
diff changeset
579 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
580 ret void
anatofuz
parents:
diff changeset
581 }
anatofuz
parents:
diff changeset
582
anatofuz
parents:
diff changeset
583 ; GCN-LABEL: {{^}}v_clamp_v2f16_not_zero:
anatofuz
parents:
diff changeset
584 ; GFX9: v_pk_max_f16
anatofuz
parents:
diff changeset
585 ; GFX9: v_pk_min_f16
anatofuz
parents:
diff changeset
586 define amdgpu_kernel void @v_clamp_v2f16_not_zero(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
587 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
588 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
589 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
590 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
anatofuz
parents:
diff changeset
591 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 0.0>)
anatofuz
parents:
diff changeset
592 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
anatofuz
parents:
diff changeset
593
anatofuz
parents:
diff changeset
594 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
595 ret void
anatofuz
parents:
diff changeset
596 }
anatofuz
parents:
diff changeset
597
anatofuz
parents:
diff changeset
598 ; GCN-LABEL: {{^}}v_clamp_v2f16_not_one:
anatofuz
parents:
diff changeset
599 ; GFX9: v_pk_max_f16
anatofuz
parents:
diff changeset
600 ; GFX9: v_pk_min_f16
anatofuz
parents:
diff changeset
601 define amdgpu_kernel void @v_clamp_v2f16_not_one(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
602 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
603 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
604 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
605 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
anatofuz
parents:
diff changeset
606 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 0.0, half 0.0>)
anatofuz
parents:
diff changeset
607 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 0.0, half 1.0>)
anatofuz
parents:
diff changeset
608
anatofuz
parents:
diff changeset
609 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
610 ret void
anatofuz
parents:
diff changeset
611 }
anatofuz
parents:
diff changeset
612
anatofuz
parents:
diff changeset
613 ; GCN-LABEL: {{^}}v_clamp_neg_v2f16:
anatofuz
parents:
diff changeset
614 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
615 ; GFX9-NOT: [[A]]
anatofuz
parents:
diff changeset
616 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
anatofuz
parents:
diff changeset
617 define amdgpu_kernel void @v_clamp_neg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
618 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
619 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
620 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
621 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
anatofuz
parents:
diff changeset
622 %fneg.a = fsub <2 x half> <half -0.0, half -0.0>, %a
anatofuz
parents:
diff changeset
623 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.a, <2 x half> zeroinitializer)
anatofuz
parents:
diff changeset
624 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
anatofuz
parents:
diff changeset
625
anatofuz
parents:
diff changeset
626 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
627 ret void
anatofuz
parents:
diff changeset
628 }
anatofuz
parents:
diff changeset
629
anatofuz
parents:
diff changeset
630 ; GCN-LABEL: {{^}}v_clamp_negabs_v2f16:
anatofuz
parents:
diff changeset
631 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
632 ; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, [[A]]
anatofuz
parents:
diff changeset
633 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[ABS]], [[ABS]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
anatofuz
parents:
diff changeset
634 define amdgpu_kernel void @v_clamp_negabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
635 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
636 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
637 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
638 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
anatofuz
parents:
diff changeset
639 %fabs.a = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
anatofuz
parents:
diff changeset
640 %fneg.fabs.a = fsub <2 x half> <half -0.0, half -0.0>, %fabs.a
anatofuz
parents:
diff changeset
641
anatofuz
parents:
diff changeset
642 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.fabs.a, <2 x half> zeroinitializer)
anatofuz
parents:
diff changeset
643 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
anatofuz
parents:
diff changeset
644
anatofuz
parents:
diff changeset
645 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
646 ret void
anatofuz
parents:
diff changeset
647 }
anatofuz
parents:
diff changeset
648
anatofuz
parents:
diff changeset
649 ; GCN-LABEL: {{^}}v_clamp_neglo_v2f16:
anatofuz
parents:
diff changeset
650 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
651 ; GFX9-NOT: [[A]]
anatofuz
parents:
diff changeset
652 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_lo:[1,1] clamp{{$}}
anatofuz
parents:
diff changeset
653 define amdgpu_kernel void @v_clamp_neglo_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
654 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
655 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
656 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
657 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
anatofuz
parents:
diff changeset
658 %lo = extractelement <2 x half> %a, i32 0
anatofuz
parents:
diff changeset
659 %neg.lo = fsub half -0.0, %lo
anatofuz
parents:
diff changeset
660 %neg.lo.vec = insertelement <2 x half> %a, half %neg.lo, i32 0
anatofuz
parents:
diff changeset
661 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.lo.vec, <2 x half> zeroinitializer)
anatofuz
parents:
diff changeset
662 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
anatofuz
parents:
diff changeset
663
anatofuz
parents:
diff changeset
664 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
665 ret void
anatofuz
parents:
diff changeset
666 }
anatofuz
parents:
diff changeset
667
anatofuz
parents:
diff changeset
668 ; GCN-LABEL: {{^}}v_clamp_neghi_v2f16:
anatofuz
parents:
diff changeset
669 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
670 ; GFX9-NOT: [[A]]
anatofuz
parents:
diff changeset
671 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_hi:[1,1] clamp{{$}}
anatofuz
parents:
diff changeset
672 define amdgpu_kernel void @v_clamp_neghi_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
673 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
674 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
675 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
676 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
anatofuz
parents:
diff changeset
677 %hi = extractelement <2 x half> %a, i32 1
anatofuz
parents:
diff changeset
678 %neg.hi = fsub half -0.0, %hi
anatofuz
parents:
diff changeset
679 %neg.hi.vec = insertelement <2 x half> %a, half %neg.hi, i32 1
anatofuz
parents:
diff changeset
680 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.hi.vec, <2 x half> zeroinitializer)
anatofuz
parents:
diff changeset
681 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
anatofuz
parents:
diff changeset
682
anatofuz
parents:
diff changeset
683 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
684 ret void
anatofuz
parents:
diff changeset
685 }
anatofuz
parents:
diff changeset
686
anatofuz
parents:
diff changeset
687 ; GCN-LABEL: {{^}}v_clamp_v2f16_shuffle:
anatofuz
parents:
diff changeset
688 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
689 ; GFX9-NOT: [[A]]
anatofuz
parents:
diff changeset
690 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] op_sel:[1,1] op_sel_hi:[0,0] clamp{{$}}
anatofuz
parents:
diff changeset
691 define amdgpu_kernel void @v_clamp_v2f16_shuffle(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
692 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
693 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
694 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
695 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
anatofuz
parents:
diff changeset
696 %shuf = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
anatofuz
parents:
diff changeset
697 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
anatofuz
parents:
diff changeset
698 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
anatofuz
parents:
diff changeset
699
anatofuz
parents:
diff changeset
700 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
701 ret void
anatofuz
parents:
diff changeset
702 }
anatofuz
parents:
diff changeset
703
anatofuz
parents:
diff changeset
704 ; GCN-LABEL: {{^}}v_clamp_v2f16_undef_limit_elts0:
anatofuz
parents:
diff changeset
705 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
706 ; GFX9-NOT: [[A]]
anatofuz
parents:
diff changeset
707 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
708 define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
709 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
710 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
711 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
712 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
anatofuz
parents:
diff changeset
713 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 0.0, half undef>)
anatofuz
parents:
diff changeset
714 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half undef, half 1.0>)
anatofuz
parents:
diff changeset
715
anatofuz
parents:
diff changeset
716 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
717 ret void
anatofuz
parents:
diff changeset
718 }
anatofuz
parents:
diff changeset
719
anatofuz
parents:
diff changeset
720 ; GCN-LABEL: {{^}}v_clamp_v2f16_undef_limit_elts1:
anatofuz
parents:
diff changeset
721 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
722 ; GFX9-NOT: [[A]]
anatofuz
parents:
diff changeset
723 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
anatofuz
parents:
diff changeset
724 define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
anatofuz
parents:
diff changeset
725 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
726 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
anatofuz
parents:
diff changeset
727 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
anatofuz
parents:
diff changeset
728 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
anatofuz
parents:
diff changeset
729 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half undef, half 0.0>)
anatofuz
parents:
diff changeset
730 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half undef>)
anatofuz
parents:
diff changeset
731
anatofuz
parents:
diff changeset
732 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
733 ret void
anatofuz
parents:
diff changeset
734 }
anatofuz
parents:
diff changeset
735
anatofuz
parents:
diff changeset
736 ; GCN-LABEL: {{^}}v_clamp_diff_source_f32:
anatofuz
parents:
diff changeset
737 ; GCN: v_add_f32_e32 [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
738 ; GCN: v_add_f32_e32 [[B:v[0-9]+]]
anatofuz
parents:
diff changeset
739 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[B]] clamp{{$}}
anatofuz
parents:
diff changeset
740 define amdgpu_kernel void @v_clamp_diff_source_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0
anatofuz
parents:
diff changeset
741 {
anatofuz
parents:
diff changeset
742 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 0
anatofuz
parents:
diff changeset
743 %gep1 = getelementptr float, float addrspace(1)* %aptr, i32 1
anatofuz
parents:
diff changeset
744 %gep2 = getelementptr float, float addrspace(1)* %aptr, i32 2
anatofuz
parents:
diff changeset
745 %l0 = load float, float addrspace(1)* %gep0
anatofuz
parents:
diff changeset
746 %l1 = load float, float addrspace(1)* %gep1
anatofuz
parents:
diff changeset
747 %l2 = load float, float addrspace(1)* %gep2
anatofuz
parents:
diff changeset
748 %a = fadd nsz float %l0, %l1
anatofuz
parents:
diff changeset
749 %b = fadd nsz float %l0, %l2
anatofuz
parents:
diff changeset
750 %res = call nsz float @llvm.maxnum.f32(float %a, float %b)
anatofuz
parents:
diff changeset
751 %max = call nsz float @llvm.maxnum.f32(float %res, float 0.0)
anatofuz
parents:
diff changeset
752 %min = call nsz float @llvm.minnum.f32(float %max, float 1.0)
anatofuz
parents:
diff changeset
753 %out.gep = getelementptr float, float addrspace(1)* %out, i32 3
anatofuz
parents:
diff changeset
754 store float %min, float addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
755 ret void
anatofuz
parents:
diff changeset
756 }
anatofuz
parents:
diff changeset
757
anatofuz
parents:
diff changeset
758 declare i32 @llvm.amdgcn.workitem.id.x() #1
anatofuz
parents:
diff changeset
759 declare float @llvm.fabs.f32(float) #1
anatofuz
parents:
diff changeset
760 declare float @llvm.minnum.f32(float, float) #1
anatofuz
parents:
diff changeset
761 declare float @llvm.maxnum.f32(float, float) #1
anatofuz
parents:
diff changeset
762 declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
anatofuz
parents:
diff changeset
763 declare double @llvm.fabs.f64(double) #1
anatofuz
parents:
diff changeset
764 declare double @llvm.minnum.f64(double, double) #1
anatofuz
parents:
diff changeset
765 declare double @llvm.maxnum.f64(double, double) #1
anatofuz
parents:
diff changeset
766 declare half @llvm.fabs.f16(half) #1
anatofuz
parents:
diff changeset
767 declare half @llvm.minnum.f16(half, half) #1
anatofuz
parents:
diff changeset
768 declare half @llvm.maxnum.f16(half, half) #1
anatofuz
parents:
diff changeset
769 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
anatofuz
parents:
diff changeset
770 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
anatofuz
parents:
diff changeset
771 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
anatofuz
parents:
diff changeset
772
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
773 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
150
anatofuz
parents:
diff changeset
774 attributes #1 = { nounwind readnone }
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
775 attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp-exceptions" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
776 attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="+fp-exceptions" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
777 attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="+fp-exceptions" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }