annotate test/CodeGen/AMDGPU/fmuladd.f32.ll @ 128:c347d3398279 default tip

fix
author mir3636
date Wed, 06 Dec 2017 14:37:17 +0900
parents 803732b1fca8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
121
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
1 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-STRICT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,SI %s
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,SI %s
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-STRICT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-STRICT,SI %s
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
4 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-STRICT,SI %s
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
5
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
6 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-CONTRACT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-CONTRACT,SI %s
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
7 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
8 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-CONTRACT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-CONTRACT,SI %s
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
9 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
10
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
11 ; Test all permutations of: fp32 denormals, fast fp contract, fp contract enabled for fmuladd, fmaf fast/slow.
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
12
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
13 target triple = "amdgcn--"
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
14
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
15
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
16 declare i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
17 declare float @llvm.fmuladd.f32(float, float, float) #1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
18 declare half @llvm.fmuladd.f16(half, half, half) #1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
19 declare float @llvm.fabs.f32(float) #1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
20
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
21 ; GCN-LABEL: {{^}}fmuladd_f32:
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
22 ; GCN-FLUSH: v_mac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
23
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
24 ; GCN-DENORM-FASTFMA: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
25
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
26 ; GCN-DENORM-SLOWFMA: v_mul_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
27 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
28 define amdgpu_kernel void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
29 float addrspace(1)* %in2, float addrspace(1)* %in3) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
30 %r0 = load float, float addrspace(1)* %in1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
31 %r1 = load float, float addrspace(1)* %in2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
32 %r2 = load float, float addrspace(1)* %in3
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
33 %r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2)
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
34 store float %r3, float addrspace(1)* %out
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
35 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
36 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
37
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
38 ; GCN-LABEL: {{^}}fmul_fadd_f32:
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
39 ; GCN-FLUSH: v_mac_f32
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
40
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
41 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
42
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
43 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
44 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
45
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
46 ; GCN-DENORM-STRICT: v_mul_f32_e32
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
47 ; GCN-DENORM-STRICT: v_add_f32_e32
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
48 define amdgpu_kernel void @fmul_fadd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
49 float addrspace(1)* %in2, float addrspace(1)* %in3) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
50 %r0 = load volatile float, float addrspace(1)* %in1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
51 %r1 = load volatile float, float addrspace(1)* %in2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
52 %r2 = load volatile float, float addrspace(1)* %in3
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
53 %mul = fmul float %r0, %r1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
54 %add = fadd float %mul, %r2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
55 store float %add, float addrspace(1)* %out
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
56 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
57 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
58
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
59 ; GCN-LABEL: {{^}}fmuladd_2.0_a_b_f32
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
60 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
61 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
62
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
63 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
64 ; SI-FLUSH: buffer_store_dword [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
65 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
66
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
67 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
68
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
69 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
70 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
71
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
72 ; SI-DENORM buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
73 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
74 define amdgpu_kernel void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
75 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
76 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
77 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
78 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
79
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
80 %r1 = load volatile float, float addrspace(1)* %gep.0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
81 %r2 = load volatile float, float addrspace(1)* %gep.1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
82
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
83 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2)
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
84 store float %r3, float addrspace(1)* %gep.out
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
85 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
86 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
87
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
88 ; GCN-LABEL: {{^}}fmuladd_a_2.0_b_f32
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
89 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
90 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
91
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
92 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
93 ; SI-FLUSH: buffer_store_dword [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
94 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
95
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
96 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
97
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
98 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
99 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
100
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
101 ; SI-DENORM: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
102 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
103 define amdgpu_kernel void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
104 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
105 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
106 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
107 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
108
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
109 %r1 = load volatile float, float addrspace(1)* %gep.0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
110 %r2 = load volatile float, float addrspace(1)* %gep.1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
111
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
112 %r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2)
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
113 store float %r3, float addrspace(1)* %gep.out
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
114 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
115 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
116
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
117 ; GCN-LABEL: {{^}}fadd_a_a_b_f32:
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
118 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
119 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
120
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
121 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
122 ; SI-FLUSH: buffer_store_dword [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
123 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
124
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
125 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
126
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
127 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
128 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
129
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
130 ; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
131 ; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
132
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
133 ; SI-DENORM: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
134 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
135 define amdgpu_kernel void @fadd_a_a_b_f32(float addrspace(1)* %out,
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
136 float addrspace(1)* %in1,
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
137 float addrspace(1)* %in2) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
138 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
139 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
140 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
141 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
142
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
143 %r0 = load volatile float, float addrspace(1)* %gep.0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
144 %r1 = load volatile float, float addrspace(1)* %gep.1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
145
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
146 %add.0 = fadd float %r0, %r0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
147 %add.1 = fadd float %add.0, %r1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
148 store float %add.1, float addrspace(1)* %gep.out
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
149 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
150 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
151
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
152 ; GCN-LABEL: {{^}}fadd_b_a_a_f32:
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
153 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
154 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
155
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
156 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
157 ; SI-FLUSH: buffer_store_dword [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
158 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
159
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
160 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
161
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
162 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
163 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
164
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
165 ; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
166 ; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
167
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
168 ; SI-DENORM: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
169 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
170 define amdgpu_kernel void @fadd_b_a_a_f32(float addrspace(1)* %out,
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
171 float addrspace(1)* %in1,
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
172 float addrspace(1)* %in2) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
173 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
174 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
175 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
176 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
177
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
178 %r0 = load volatile float, float addrspace(1)* %gep.0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
179 %r1 = load volatile float, float addrspace(1)* %gep.1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
180
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
181 %add.0 = fadd float %r0, %r0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
182 %add.1 = fadd float %r1, %add.0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
183 store float %add.1, float addrspace(1)* %gep.out
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
184 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
185 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
186
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
187 ; GCN-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
188 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
189 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
190 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
191
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
192 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
193
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
194 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
195 ; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
196
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
197 ; SI-DENORM: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
198 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
199 define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
200 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
201 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
202 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
203 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
204
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
205 %r1 = load volatile float, float addrspace(1)* %gep.0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
206 %r2 = load volatile float, float addrspace(1)* %gep.1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
207
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
208 %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2)
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
209 store float %r3, float addrspace(1)* %gep.out
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
210 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
211 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
212
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
213 ; GCN-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
214 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
215 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
216
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
217 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
218 ; SI-FLUSH: buffer_store_dword [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
219 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
220
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
221 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], -[[R1]], -2.0, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
222
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
223 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
224 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
225
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
226 ; SI-DENORM: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
227 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
228 define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
229 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
230 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
231 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
232 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
233
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
234 %r1 = load volatile float, float addrspace(1)* %gep.0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
235 %r2 = load volatile float, float addrspace(1)* %gep.1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
236
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
237 %r1.fneg = fsub float -0.000000e+00, %r1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
238
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
239 %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2)
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
240 store float %r3, float addrspace(1)* %gep.out
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
241 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
242 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
243
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
244 ; GCN-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32:
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
245 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
246 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
247
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
248 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
249 ; SI-FLUSH: buffer_store_dword [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
250 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
251
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
252 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], -[[R1]], 2.0, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
253
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
254 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
255 ; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
256
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
257 ; SI-DENORM: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
258 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
259 define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
260 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
261 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
262 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
263 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
264
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
265 %r1 = load volatile float, float addrspace(1)* %gep.0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
266 %r2 = load volatile float, float addrspace(1)* %gep.1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
267
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
268 %r1.fneg = fsub float -0.000000e+00, %r1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
269
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
270 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2)
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
271 store float %r3, float addrspace(1)* %gep.out
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
272 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
273 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
274
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
275 ; GCN-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32:
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
276 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
277 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
278 ; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
279 ; SI-FLUSH: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
280 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
281
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
282 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
283
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
284 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
285 ; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
286
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
287 ; SI-DENORM: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
288 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
289 define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
290 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
291 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
292 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
293 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
294
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
295 %r1 = load volatile float, float addrspace(1)* %gep.0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
296 %r2 = load volatile float, float addrspace(1)* %gep.1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
297
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
298 %r2.fneg = fsub float -0.000000e+00, %r2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
299
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
300 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2.fneg)
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
301 store float %r3, float addrspace(1)* %gep.out
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
302 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
303 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
304
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
305 ; GCN-LABEL: {{^}}mad_sub_f32:
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
306 ; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
307 ; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
308 ; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
309 ; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
310
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
311 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
312
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
313 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
314 ; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
315
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
316 ; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
317 ; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
318
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
319 ; SI: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
320 ; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
321 define amdgpu_kernel void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
322 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
323 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
324 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
325 %add1 = add i64 %tid.ext, 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
326 %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
327 %add2 = add i64 %tid.ext, 2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
328 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
329 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
330 %a = load volatile float, float addrspace(1)* %gep0, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
331 %b = load volatile float, float addrspace(1)* %gep1, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
332 %c = load volatile float, float addrspace(1)* %gep2, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
333 %mul = fmul float %a, %b
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
334 %sub = fsub float %mul, %c
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
335 store float %sub, float addrspace(1)* %outgep, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
336 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
337 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
338
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
339 ; GCN-LABEL: {{^}}mad_sub_inv_f32:
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
340 ; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
341 ; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
342 ; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
343
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
344 ; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
345
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
346 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
347
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
348 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
349 ; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
350
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
351 ; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
352 ; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
353
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
354 ; SI: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
355 ; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
356 define amdgpu_kernel void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
357 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
358 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
359 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
360 %add1 = add i64 %tid.ext, 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
361 %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
362 %add2 = add i64 %tid.ext, 2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
363 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
364 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
365 %a = load volatile float, float addrspace(1)* %gep0, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
366 %b = load volatile float, float addrspace(1)* %gep1, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
367 %c = load volatile float, float addrspace(1)* %gep2, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
368 %mul = fmul float %a, %b
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
369 %sub = fsub float %c, %mul
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
370 store float %sub, float addrspace(1)* %outgep, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
371 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
372 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
373
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
374 ; GCN-LABEL: {{^}}mad_sub_fabs_f32:
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
375 ; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
376 ; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
377 ; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
378 ; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
379
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
380 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
381
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
382 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
383 ; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e64 [[RESULT:v[0-9]+]], [[TMP]], |[[REGC]]|
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
384
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
385 ; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
386 ; GCN-DENORM-STRICT: v_sub_f32_e64 [[RESULT:v[0-9]+]], [[TMP]], |[[REGC]]|
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
387
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
388 ; SI: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
389 ; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
390 define amdgpu_kernel void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
391 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
392 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
393 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
394 %add1 = add i64 %tid.ext, 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
395 %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
396 %add2 = add i64 %tid.ext, 2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
397 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
398 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
399 %a = load volatile float, float addrspace(1)* %gep0, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
400 %b = load volatile float, float addrspace(1)* %gep1, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
401 %c = load volatile float, float addrspace(1)* %gep2, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
402 %c.abs = call float @llvm.fabs.f32(float %c) #0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
403 %mul = fmul float %a, %b
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
404 %sub = fsub float %mul, %c.abs
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
405 store float %sub, float addrspace(1)* %outgep, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
406 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
407 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
408
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
409 ; GCN-LABEL: {{^}}mad_sub_fabs_inv_f32:
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
410 ; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
411 ; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
412 ; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
413 ; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
414
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
415 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
416
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
417 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
418 ; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e64 [[RESULT:v[0-9]+]], |[[REGC]]|, [[TMP]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
419
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
420 ; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
421 ; GCN-DENORM-STRICT: v_sub_f32_e64 [[RESULT:v[0-9]+]], |[[REGC]]|, [[TMP]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
422
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
423 ; SI: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
424 ; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
425 define amdgpu_kernel void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
426 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
427 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
428 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
429 %add1 = add i64 %tid.ext, 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
430 %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
431 %add2 = add i64 %tid.ext, 2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
432 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
433 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
434 %a = load volatile float, float addrspace(1)* %gep0, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
435 %b = load volatile float, float addrspace(1)* %gep1, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
436 %c = load volatile float, float addrspace(1)* %gep2, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
437 %c.abs = call float @llvm.fabs.f32(float %c) #0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
438 %mul = fmul float %a, %b
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
439 %sub = fsub float %c.abs, %mul
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
440 store float %sub, float addrspace(1)* %outgep, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
441 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
442 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
443
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
444 ; GCN-LABEL: {{^}}neg_neg_mad_f32:
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
445 ; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
446 ; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
447 ; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
448
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
449 ; GCN-FLUSH: v_mac_f32_e32 [[REGC]], [[REGA]], [[REGB]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
450 ; SI-FLUSH: buffer_store_dword [[REGC]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
451 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REGC]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
452
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
453 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], [[REGC]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
454
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
455 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
456 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
457
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
458 ; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
459 ; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
460
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
461 ; SI-DENORM: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
462 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
463 define amdgpu_kernel void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
464 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
465 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
466 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
467 %add1 = add i64 %tid.ext, 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
468 %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
469 %add2 = add i64 %tid.ext, 2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
470 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
471 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
472 %a = load volatile float, float addrspace(1)* %gep0, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
473 %b = load volatile float, float addrspace(1)* %gep1, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
474 %c = load volatile float, float addrspace(1)* %gep2, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
475 %nega = fsub float -0.000000e+00, %a
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
476 %negb = fsub float -0.000000e+00, %b
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
477 %mul = fmul float %nega, %negb
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
478 %sub = fadd float %mul, %c
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
479 store float %sub, float addrspace(1)* %outgep, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
480 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
481 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
482
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
483 ; GCN-LABEL: {{^}}mad_fabs_sub_f32:
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
484 ; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
485 ; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
486 ; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
487 ; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
488
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
489 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
490
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
491 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e64 [[TMP:v[0-9]+]], [[REGA]], |[[REGB]]|
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
492 ; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
493
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
494 ; GCN-DENORM-STRICT: v_mul_f32_e64 [[TMP:v[0-9]+]], [[REGA]], |[[REGB]]|
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
495 ; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
496
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
497 ; SI: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
498 ; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
499 define amdgpu_kernel void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
500 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
501 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
502 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
503 %add1 = add i64 %tid.ext, 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
504 %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
505 %add2 = add i64 %tid.ext, 2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
506 %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
507 %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
508 %a = load volatile float, float addrspace(1)* %gep0, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
509 %b = load volatile float, float addrspace(1)* %gep1, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
510 %c = load volatile float, float addrspace(1)* %gep2, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
511 %b.abs = call float @llvm.fabs.f32(float %b) #0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
512 %mul = fmul float %a, %b.abs
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
513 %sub = fsub float %mul, %c
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
514 store float %sub, float addrspace(1)* %outgep, align 4
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
515 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
516 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
517
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
518 ; GCN-LABEL: {{^}}fsub_c_fadd_a_a_f32:
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
519 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
520 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
521 ; GCN-FLUSH: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
522 ; SI-FLUSH: buffer_store_dword [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
523 ; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
524
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
525 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
526
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
527 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
528 ; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
529
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
530 ; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
531 ; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
532
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
533 ; SI-DENORM: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
534 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
535 define amdgpu_kernel void @fsub_c_fadd_a_a_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
536 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
537 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
538 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
539 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
540
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
541 %r1 = load volatile float, float addrspace(1)* %gep.0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
542 %r2 = load volatile float, float addrspace(1)* %gep.1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
543
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
544 %add = fadd float %r1, %r1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
545 %r3 = fsub float %r2, %add
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
546
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
547 store float %r3, float addrspace(1)* %gep.out
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
548 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
549 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
550
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
551 ; GCN-LABEL: {{^}}fsub_fadd_a_a_c_f32:
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
552 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
553 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
554 ; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
555
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
556 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
557
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
558 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
559 ; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
560
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
561 ; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
562 ; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
563
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
564 ; SI: buffer_store_dword [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
565 ; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
566 define amdgpu_kernel void @fsub_fadd_a_a_c_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
567 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
568 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
569 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
570 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
571
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
572 %r1 = load volatile float, float addrspace(1)* %gep.0
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
573 %r2 = load volatile float, float addrspace(1)* %gep.1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
574
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
575 %add = fadd float %r1, %r1
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
576 %r3 = fsub float %add, %r2
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
577
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
578 store float %r3, float addrspace(1)* %gep.out
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
579 ret void
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
580 }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
581
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
582 attributes #0 = { nounwind }
803732b1fca8 LLVM 5.0
kono
parents:
diff changeset
583 attributes #1 = { nounwind readnone }