121
|
1 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-STRICT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,SI %s
|
|
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,SI %s
|
|
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-STRICT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-STRICT,SI %s
|
|
4 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-STRICT,SI %s
|
|
5
|
|
6 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-CONTRACT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-CONTRACT,SI %s
|
|
7 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s
|
|
8 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-CONTRACT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-CONTRACT,SI %s
|
|
9 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s
|
|
10
|
|
11 ; Test all permutations of: fp32 denormals, fast fp contract, fp contract enabled for fmuladd, fmaf fast/slow.
|
|
12
|
|
13 target triple = "amdgcn--"
|
|
14
|
|
15
|
|
16 declare i32 @llvm.amdgcn.workitem.id.x() #1
|
|
17 declare float @llvm.fmuladd.f32(float, float, float) #1
|
|
18 declare half @llvm.fmuladd.f16(half, half, half) #1
|
|
19 declare float @llvm.fabs.f32(float) #1
|
|
20
|
|
21 ; GCN-LABEL: {{^}}fmuladd_f32:
|
|
22 ; GCN-FLUSH: v_mac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
|
|
23
|
|
24 ; GCN-DENORM-FASTFMA: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
|
|
25
|
|
26 ; GCN-DENORM-SLOWFMA: v_mul_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
|
|
27 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
|
|
28 define amdgpu_kernel void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
|
|
29 float addrspace(1)* %in2, float addrspace(1)* %in3) #0 {
|
|
30 %r0 = load float, float addrspace(1)* %in1
|
|
31 %r1 = load float, float addrspace(1)* %in2
|
|
32 %r2 = load float, float addrspace(1)* %in3
|
|
33 %r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2)
|
|
34 store float %r3, float addrspace(1)* %out
|
|
35 ret void
|
|
36 }
|
|
37
|
|
38 ; GCN-LABEL: {{^}}fmul_fadd_f32:
|
|
39 ; GCN-FLUSH: v_mac_f32
|
|
40
|
|
41 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32
|
|
42
|
|
43 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32
|
|
44 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32
|
|
45
|
|
46 ; GCN-DENORM-STRICT: v_mul_f32_e32
|
|
47 ; GCN-DENORM-STRICT: v_add_f32_e32
|
|
48 define amdgpu_kernel void @fmul_fadd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
|
|
49 float addrspace(1)* %in2, float addrspace(1)* %in3) #0 {
|
|
50 %r0 = load volatile float, float addrspace(1)* %in1
|
|
51 %r1 = load volatile float, float addrspace(1)* %in2
|
|
52 %r2 = load volatile float, float addrspace(1)* %in3
|
|
53 %mul = fmul float %r0, %r1
|
|
54 %add = fadd float %mul, %r2
|
|
55 store float %add, float addrspace(1)* %out
|
|
56 ret void
|
|
57 }
|
|
58
|
|
59 ; GCN-LABEL: {{^}}fmuladd_2.0_a_b_f32
|
|
60 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
|
61 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
|
62
|
|
63 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
|
64 ; SI-FLUSH: buffer_store_dword [[R2]]
|
|
65 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
|
66
|
|
67 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
|
|
68
|
|
69 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
|
70 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
|
71
|
|
72 ; SI-DENORM buffer_store_dword [[RESULT]]
|
|
73 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
74 define amdgpu_kernel void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
75 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
76 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
77 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
78 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
79
|
|
80 %r1 = load volatile float, float addrspace(1)* %gep.0
|
|
81 %r2 = load volatile float, float addrspace(1)* %gep.1
|
|
82
|
|
83 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2)
|
|
84 store float %r3, float addrspace(1)* %gep.out
|
|
85 ret void
|
|
86 }
|
|
87
|
|
88 ; GCN-LABEL: {{^}}fmuladd_a_2.0_b_f32
|
|
89 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
|
90 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
|
91
|
|
92 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
|
93 ; SI-FLUSH: buffer_store_dword [[R2]]
|
|
94 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
|
95
|
|
96 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
|
|
97
|
|
98 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
|
99 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
|
100
|
|
101 ; SI-DENORM: buffer_store_dword [[RESULT]]
|
|
102 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
103 define amdgpu_kernel void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
104 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
105 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
106 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
107 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
108
|
|
109 %r1 = load volatile float, float addrspace(1)* %gep.0
|
|
110 %r2 = load volatile float, float addrspace(1)* %gep.1
|
|
111
|
|
112 %r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2)
|
|
113 store float %r3, float addrspace(1)* %gep.out
|
|
114 ret void
|
|
115 }
|
|
116
|
|
117 ; GCN-LABEL: {{^}}fadd_a_a_b_f32:
|
|
118 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
|
119 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
|
120
|
|
121 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
|
122 ; SI-FLUSH: buffer_store_dword [[R2]]
|
|
123 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
|
124
|
|
125 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
|
|
126
|
|
127 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
|
128 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
|
129
|
|
130 ; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
|
131 ; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
|
132
|
|
133 ; SI-DENORM: buffer_store_dword [[RESULT]]
|
|
134 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
135 define amdgpu_kernel void @fadd_a_a_b_f32(float addrspace(1)* %out,
|
|
136 float addrspace(1)* %in1,
|
|
137 float addrspace(1)* %in2) #0 {
|
|
138 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
139 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
140 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
141 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
142
|
|
143 %r0 = load volatile float, float addrspace(1)* %gep.0
|
|
144 %r1 = load volatile float, float addrspace(1)* %gep.1
|
|
145
|
|
146 %add.0 = fadd float %r0, %r0
|
|
147 %add.1 = fadd float %add.0, %r1
|
|
148 store float %add.1, float addrspace(1)* %gep.out
|
|
149 ret void
|
|
150 }
|
|
151
|
|
152 ; GCN-LABEL: {{^}}fadd_b_a_a_f32:
|
|
153 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
|
154 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
|
155
|
|
156 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
|
157 ; SI-FLUSH: buffer_store_dword [[R2]]
|
|
158 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
|
159
|
|
160 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
|
|
161
|
|
162 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
|
163 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
|
|
164
|
|
165 ; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
|
166 ; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
|
|
167
|
|
168 ; SI-DENORM: buffer_store_dword [[RESULT]]
|
|
169 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
170 define amdgpu_kernel void @fadd_b_a_a_f32(float addrspace(1)* %out,
|
|
171 float addrspace(1)* %in1,
|
|
172 float addrspace(1)* %in2) #0 {
|
|
173 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
174 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
175 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
176 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
177
|
|
178 %r0 = load volatile float, float addrspace(1)* %gep.0
|
|
179 %r1 = load volatile float, float addrspace(1)* %gep.1
|
|
180
|
|
181 %add.0 = fadd float %r0, %r0
|
|
182 %add.1 = fadd float %r1, %add.0
|
|
183 store float %add.1, float addrspace(1)* %gep.out
|
|
184 ret void
|
|
185 }
|
|
186
|
|
187 ; GCN-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
|
|
188 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
|
189 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
|
190 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
|
|
191
|
|
192 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
|
|
193
|
|
194 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
|
195 ; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
|
|
196
|
|
197 ; SI-DENORM: buffer_store_dword [[RESULT]]
|
|
198 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
199 define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
200 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
201 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
202 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
203 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
204
|
|
205 %r1 = load volatile float, float addrspace(1)* %gep.0
|
|
206 %r2 = load volatile float, float addrspace(1)* %gep.1
|
|
207
|
|
208 %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2)
|
|
209 store float %r3, float addrspace(1)* %gep.out
|
|
210 ret void
|
|
211 }
|
|
212
|
|
213 ; GCN-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
|
|
214 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
|
215 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
|
216
|
|
217 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
|
218 ; SI-FLUSH: buffer_store_dword [[R2]]
|
|
219 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
|
220
|
|
221 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], -[[R1]], -2.0, [[R2]]
|
|
222
|
|
223 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
|
224 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
|
|
225
|
|
226 ; SI-DENORM: buffer_store_dword [[RESULT]]
|
|
227 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
228 define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
229 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
230 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
231 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
232 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
233
|
|
234 %r1 = load volatile float, float addrspace(1)* %gep.0
|
|
235 %r2 = load volatile float, float addrspace(1)* %gep.1
|
|
236
|
|
237 %r1.fneg = fsub float -0.000000e+00, %r1
|
|
238
|
|
239 %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2)
|
|
240 store float %r3, float addrspace(1)* %gep.out
|
|
241 ret void
|
|
242 }
|
|
243
|
|
244 ; GCN-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32:
|
|
245 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
|
246 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
|
247
|
|
248 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
|
|
249 ; SI-FLUSH: buffer_store_dword [[R2]]
|
|
250 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
|
251
|
|
252 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], -[[R1]], 2.0, [[R2]]
|
|
253
|
|
254 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
|
255 ; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
|
|
256
|
|
257 ; SI-DENORM: buffer_store_dword [[RESULT]]
|
|
258 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
259 define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
260 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
261 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
262 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
263 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
264
|
|
265 %r1 = load volatile float, float addrspace(1)* %gep.0
|
|
266 %r2 = load volatile float, float addrspace(1)* %gep.1
|
|
267
|
|
268 %r1.fneg = fsub float -0.000000e+00, %r1
|
|
269
|
|
270 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2)
|
|
271 store float %r3, float addrspace(1)* %gep.out
|
|
272 ret void
|
|
273 }
|
|
274
|
|
275 ; GCN-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32:
|
|
276 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
|
277 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
|
278 ; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
|
|
279 ; SI-FLUSH: buffer_store_dword [[RESULT]]
|
|
280 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
281
|
|
282 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
|
|
283
|
|
284 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
|
285 ; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
|
286
|
|
287 ; SI-DENORM: buffer_store_dword [[RESULT]]
|
|
288 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
289 define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
290 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
291 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
292 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
293 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
294
|
|
295 %r1 = load volatile float, float addrspace(1)* %gep.0
|
|
296 %r2 = load volatile float, float addrspace(1)* %gep.1
|
|
297
|
|
298 %r2.fneg = fsub float -0.000000e+00, %r2
|
|
299
|
|
300 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2.fneg)
|
|
301 store float %r3, float addrspace(1)* %gep.out
|
|
302 ret void
|
|
303 }
|
|
304
|
|
305 ; GCN-LABEL: {{^}}mad_sub_f32:
|
|
306 ; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
|
|
307 ; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
|
|
308 ; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
|
|
309 ; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
|
|
310
|
|
311 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
|
|
312
|
|
313 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
|
|
314 ; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
|
|
315
|
|
316 ; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
|
|
317 ; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
|
|
318
|
|
319 ; SI: buffer_store_dword [[RESULT]]
|
|
320 ; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
321 define amdgpu_kernel void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
|
|
322 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
323 %tid.ext = sext i32 %tid to i64
|
|
324 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
|
|
325 %add1 = add i64 %tid.ext, 1
|
|
326 %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
|
|
327 %add2 = add i64 %tid.ext, 2
|
|
328 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
|
|
329 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
|
|
330 %a = load volatile float, float addrspace(1)* %gep0, align 4
|
|
331 %b = load volatile float, float addrspace(1)* %gep1, align 4
|
|
332 %c = load volatile float, float addrspace(1)* %gep2, align 4
|
|
333 %mul = fmul float %a, %b
|
|
334 %sub = fsub float %mul, %c
|
|
335 store float %sub, float addrspace(1)* %outgep, align 4
|
|
336 ret void
|
|
337 }
|
|
338
|
|
339 ; GCN-LABEL: {{^}}mad_sub_inv_f32:
|
|
340 ; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
|
|
341 ; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
|
|
342 ; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
|
|
343
|
|
344 ; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
|
|
345
|
|
346 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
|
|
347
|
|
348 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
|
|
349 ; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
|
|
350
|
|
351 ; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
|
|
352 ; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
|
|
353
|
|
354 ; SI: buffer_store_dword [[RESULT]]
|
|
355 ; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
356 define amdgpu_kernel void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
|
|
357 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
358 %tid.ext = sext i32 %tid to i64
|
|
359 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
|
|
360 %add1 = add i64 %tid.ext, 1
|
|
361 %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
|
|
362 %add2 = add i64 %tid.ext, 2
|
|
363 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
|
|
364 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
|
|
365 %a = load volatile float, float addrspace(1)* %gep0, align 4
|
|
366 %b = load volatile float, float addrspace(1)* %gep1, align 4
|
|
367 %c = load volatile float, float addrspace(1)* %gep2, align 4
|
|
368 %mul = fmul float %a, %b
|
|
369 %sub = fsub float %c, %mul
|
|
370 store float %sub, float addrspace(1)* %outgep, align 4
|
|
371 ret void
|
|
372 }
|
|
373
|
|
374 ; GCN-LABEL: {{^}}mad_sub_fabs_f32:
|
|
375 ; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
|
|
376 ; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
|
|
377 ; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
|
|
378 ; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
|
|
379
|
|
380 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
|
|
381
|
|
382 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
|
|
383 ; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e64 [[RESULT:v[0-9]+]], [[TMP]], |[[REGC]]|
|
|
384
|
|
385 ; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
|
|
386 ; GCN-DENORM-STRICT: v_sub_f32_e64 [[RESULT:v[0-9]+]], [[TMP]], |[[REGC]]|
|
|
387
|
|
388 ; SI: buffer_store_dword [[RESULT]]
|
|
389 ; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
390 define amdgpu_kernel void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
|
|
391 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
392 %tid.ext = sext i32 %tid to i64
|
|
393 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
|
|
394 %add1 = add i64 %tid.ext, 1
|
|
395 %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
|
|
396 %add2 = add i64 %tid.ext, 2
|
|
397 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
|
|
398 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
|
|
399 %a = load volatile float, float addrspace(1)* %gep0, align 4
|
|
400 %b = load volatile float, float addrspace(1)* %gep1, align 4
|
|
401 %c = load volatile float, float addrspace(1)* %gep2, align 4
|
|
402 %c.abs = call float @llvm.fabs.f32(float %c) #0
|
|
403 %mul = fmul float %a, %b
|
|
404 %sub = fsub float %mul, %c.abs
|
|
405 store float %sub, float addrspace(1)* %outgep, align 4
|
|
406 ret void
|
|
407 }
|
|
408
|
|
409 ; GCN-LABEL: {{^}}mad_sub_fabs_inv_f32:
|
|
410 ; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
|
|
411 ; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
|
|
412 ; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
|
|
413 ; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
|
|
414
|
|
415 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
|
|
416
|
|
417 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
|
|
418 ; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e64 [[RESULT:v[0-9]+]], |[[REGC]]|, [[TMP]]
|
|
419
|
|
420 ; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
|
|
421 ; GCN-DENORM-STRICT: v_sub_f32_e64 [[RESULT:v[0-9]+]], |[[REGC]]|, [[TMP]]
|
|
422
|
|
423 ; SI: buffer_store_dword [[RESULT]]
|
|
424 ; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
425 define amdgpu_kernel void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
|
|
426 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
427 %tid.ext = sext i32 %tid to i64
|
|
428 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
|
|
429 %add1 = add i64 %tid.ext, 1
|
|
430 %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
|
|
431 %add2 = add i64 %tid.ext, 2
|
|
432 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
|
|
433 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
|
|
434 %a = load volatile float, float addrspace(1)* %gep0, align 4
|
|
435 %b = load volatile float, float addrspace(1)* %gep1, align 4
|
|
436 %c = load volatile float, float addrspace(1)* %gep2, align 4
|
|
437 %c.abs = call float @llvm.fabs.f32(float %c) #0
|
|
438 %mul = fmul float %a, %b
|
|
439 %sub = fsub float %c.abs, %mul
|
|
440 store float %sub, float addrspace(1)* %outgep, align 4
|
|
441 ret void
|
|
442 }
|
|
443
|
|
444 ; GCN-LABEL: {{^}}neg_neg_mad_f32:
|
|
445 ; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
|
|
446 ; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
|
|
447 ; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
|
|
448
|
|
449 ; GCN-FLUSH: v_mac_f32_e32 [[REGC]], [[REGA]], [[REGB]]
|
|
450 ; SI-FLUSH: buffer_store_dword [[REGC]]
|
|
451 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REGC]]
|
|
452
|
|
453 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], [[REGC]]
|
|
454
|
|
455 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
|
|
456 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
|
|
457
|
|
458 ; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
|
|
459 ; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
|
|
460
|
|
461 ; SI-DENORM: buffer_store_dword [[RESULT]]
|
|
462 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
463 define amdgpu_kernel void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
|
|
464 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
465 %tid.ext = sext i32 %tid to i64
|
|
466 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
|
|
467 %add1 = add i64 %tid.ext, 1
|
|
468 %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
|
|
469 %add2 = add i64 %tid.ext, 2
|
|
470 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
|
|
471 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
|
|
472 %a = load volatile float, float addrspace(1)* %gep0, align 4
|
|
473 %b = load volatile float, float addrspace(1)* %gep1, align 4
|
|
474 %c = load volatile float, float addrspace(1)* %gep2, align 4
|
|
475 %nega = fsub float -0.000000e+00, %a
|
|
476 %negb = fsub float -0.000000e+00, %b
|
|
477 %mul = fmul float %nega, %negb
|
|
478 %sub = fadd float %mul, %c
|
|
479 store float %sub, float addrspace(1)* %outgep, align 4
|
|
480 ret void
|
|
481 }
|
|
482
|
|
483 ; GCN-LABEL: {{^}}mad_fabs_sub_f32:
|
|
484 ; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
|
|
485 ; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
|
|
486 ; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
|
|
487 ; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
|
|
488
|
|
489 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
|
|
490
|
|
491 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e64 [[TMP:v[0-9]+]], [[REGA]], |[[REGB]]|
|
|
492 ; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
|
|
493
|
|
494 ; GCN-DENORM-STRICT: v_mul_f32_e64 [[TMP:v[0-9]+]], [[REGA]], |[[REGB]]|
|
|
495 ; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
|
|
496
|
|
497 ; SI: buffer_store_dword [[RESULT]]
|
|
498 ; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
499 define amdgpu_kernel void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
|
|
500 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
501 %tid.ext = sext i32 %tid to i64
|
|
502 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
|
|
503 %add1 = add i64 %tid.ext, 1
|
|
504 %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
|
|
505 %add2 = add i64 %tid.ext, 2
|
|
506 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
|
|
507 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
|
|
508 %a = load volatile float, float addrspace(1)* %gep0, align 4
|
|
509 %b = load volatile float, float addrspace(1)* %gep1, align 4
|
|
510 %c = load volatile float, float addrspace(1)* %gep2, align 4
|
|
511 %b.abs = call float @llvm.fabs.f32(float %b) #0
|
|
512 %mul = fmul float %a, %b.abs
|
|
513 %sub = fsub float %mul, %c
|
|
514 store float %sub, float addrspace(1)* %outgep, align 4
|
|
515 ret void
|
|
516 }
|
|
517
|
|
518 ; GCN-LABEL: {{^}}fsub_c_fadd_a_a_f32:
|
|
519 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
|
520 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
|
521 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
|
|
522 ; SI-FLUSH: buffer_store_dword [[R2]]
|
|
523 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
|
524
|
|
525 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
|
|
526
|
|
527 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
|
528 ; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
|
|
529
|
|
530 ; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
|
531 ; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
|
|
532
|
|
533 ; SI-DENORM: buffer_store_dword [[RESULT]]
|
|
534 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
535 define amdgpu_kernel void @fsub_c_fadd_a_a_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
536 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
|
537 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
538 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
539 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
540
|
|
541 %r1 = load volatile float, float addrspace(1)* %gep.0
|
|
542 %r2 = load volatile float, float addrspace(1)* %gep.1
|
|
543
|
|
544 %add = fadd float %r1, %r1
|
|
545 %r3 = fsub float %r2, %add
|
|
546
|
|
547 store float %r3, float addrspace(1)* %gep.out
|
|
548 ret void
|
|
549 }
|
|
550
|
|
551 ; GCN-LABEL: {{^}}fsub_fadd_a_a_c_f32:
|
|
552 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
|
553 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
|
554 ; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
|
|
555
|
|
556 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
|
|
557
|
|
558 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
|
559 ; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
|
560
|
|
561 ; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
|
562 ; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
|
563
|
|
564 ; SI: buffer_store_dword [[RESULT]]
|
|
565 ; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
566 define amdgpu_kernel void @fsub_fadd_a_a_c_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
567 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
|
568 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
569 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
570 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
571
|
|
572 %r1 = load volatile float, float addrspace(1)* %gep.0
|
|
573 %r2 = load volatile float, float addrspace(1)* %gep.1
|
|
574
|
|
575 %add = fadd float %r1, %r1
|
|
576 %r3 = fsub float %add, %r2
|
|
577
|
|
578 store float %r3, float addrspace(1)* %gep.out
|
|
579 ret void
|
|
580 }
|
|
581
|
|
582 attributes #0 = { nounwind }
|
|
583 attributes #1 = { nounwind readnone }
|