comparison test/CodeGen/AMDGPU/fdiv.ll @ 120:1172e4bd9c6f

update 4.0.0
author mir3636
date Fri, 25 Nov 2016 19:14:25 +0900
parents afa8332a0e37
children 803732b1fca8
comparison
equal deleted inserted replaced
101:34baf5011add 120:1172e4bd9c6f
1 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s 1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 2 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
4 3
5 ; These tests check that fdiv is expanded correctly and also test that the 4 ; These tests check that fdiv is expanded correctly and also test that the
6 ; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate 5 ; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
7 ; instruction groups. 6 ; instruction groups.
8 7
8 ; These test check that fdiv using unsafe_fp_math, coarse fp div, and IEEE754 fp div.
9
9 ; FUNC-LABEL: {{^}}fdiv_f32: 10 ; FUNC-LABEL: {{^}}fdiv_f32:
10 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 11 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
11 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 12 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
12 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS 13
13 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS 14 ; SI: v_div_scale_f32
15 ; SI-DAG: v_div_scale_f32
14 16
15 ; SI-DAG: v_rcp_f32 17 ; SI-DAG: v_rcp_f32
16 ; SI-DAG: v_mul_f32 18 ; SI: v_fma_f32
17 define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) { 19 ; SI: v_fma_f32
18 entry: 20 ; SI: v_mul_f32
19 %0 = fdiv float %a, %b 21 ; SI: v_fma_f32
20 store float %0, float addrspace(1)* %out 22 ; SI: v_fma_f32
21 ret void 23 ; SI: v_fma_f32
22 } 24 ; SI: v_div_fmas_f32
23 25 ; SI: v_div_fixup_f32
24 26 define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 {
27 entry:
28 %fdiv = fdiv float %a, %b
29 store float %fdiv, float addrspace(1)* %out
30 ret void
31 }
32
33 ; FUNC-LABEL: {{^}}fdiv_25ulp_f32:
34 ; SI: v_cndmask_b32
35 ; SI: v_mul_f32
36 ; SI: v_rcp_f32
37 ; SI: v_mul_f32
38 ; SI: v_mul_f32
39 define void @fdiv_25ulp_f32(float addrspace(1)* %out, float %a, float %b) #0 {
40 entry:
41 %fdiv = fdiv float %a, %b, !fpmath !0
42 store float %fdiv, float addrspace(1)* %out
43 ret void
44 }
45
46 ; Use correct fdiv
47 ; FUNC-LABEL: {{^}}fdiv_25ulp_denormals_f32:
48 ; SI: v_fma_f32
49 ; SI: v_div_fmas_f32
50 ; SI: v_div_fixup_f32
51 define void @fdiv_25ulp_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
52 entry:
53 %fdiv = fdiv float %a, %b, !fpmath !0
54 store float %fdiv, float addrspace(1)* %out
55 ret void
56 }
57
58 ; FUNC-LABEL: {{^}}fdiv_fast_denormals_f32:
59 ; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
60 ; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
61 ; SI-NOT: [[RESULT]]
62 ; SI: buffer_store_dword [[RESULT]]
63 define void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
64 entry:
65 %fdiv = fdiv fast float %a, %b
66 store float %fdiv, float addrspace(1)* %out
67 ret void
68 }
69
70 ; FUNC-LABEL: {{^}}fdiv_f32_fast_math:
71 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
72 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
73
74 ; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
75 ; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
76 ; SI-NOT: [[RESULT]]
77 ; SI: buffer_store_dword [[RESULT]]
78 define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 {
79 entry:
80 %fdiv = fdiv fast float %a, %b
81 store float %fdiv, float addrspace(1)* %out
82 ret void
83 }
84
85 ; FUNC-LABEL: {{^}}fdiv_f32_arcp_math:
86 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
87 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
88
89 ; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
90 ; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
91 ; SI-NOT: [[RESULT]]
92 ; SI: buffer_store_dword [[RESULT]]
93 define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 {
94 entry:
95 %fdiv = fdiv arcp float %a, %b
96 store float %fdiv, float addrspace(1)* %out
97 ret void
98 }
25 99
26 ; FUNC-LABEL: {{^}}fdiv_v2f32: 100 ; FUNC-LABEL: {{^}}fdiv_v2f32:
27 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 101 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
28 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 102 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
29 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS 103 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
30 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS 104 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
31 105
32 ; SI-DAG: v_rcp_f32 106 ; SI: v_div_scale_f32
33 ; SI-DAG: v_mul_f32 107 ; SI: v_div_scale_f32
34 ; SI-DAG: v_rcp_f32 108 ; SI: v_div_scale_f32
35 ; SI-DAG: v_mul_f32 109 ; SI: v_div_scale_f32
36 define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { 110 define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
37 entry: 111 entry:
38 %0 = fdiv <2 x float> %a, %b 112 %fdiv = fdiv <2 x float> %a, %b
39 store <2 x float> %0, <2 x float> addrspace(1)* %out 113 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
114 ret void
115 }
116
117 ; FUNC-LABEL: {{^}}fdiv_ulp25_v2f32:
118 ; SI: v_cmp_gt_f32
119 ; SI: v_cmp_gt_f32
120 define void @fdiv_ulp25_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
121 entry:
122 %fdiv = fdiv arcp <2 x float> %a, %b, !fpmath !0
123 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
124 ret void
125 }
126
127 ; FUNC-LABEL: {{^}}fdiv_v2f32_fast_math:
128 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
129 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
130 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
131 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
132
133 ; SI: v_rcp_f32
134 ; SI: v_rcp_f32
135 define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
136 entry:
137 %fdiv = fdiv fast <2 x float> %a, %b
138 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
139 ret void
140 }
141
142 ; FUNC-LABEL: {{^}}fdiv_v2f32_arcp_math:
143 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
144 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
145 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
146 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
147
148 ; SI: v_rcp_f32
149 ; SI: v_rcp_f32
150 define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
151 entry:
152 %fdiv = fdiv arcp <2 x float> %a, %b
153 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
40 ret void 154 ret void
41 } 155 }
42 156
43 ; FUNC-LABEL: {{^}}fdiv_v4f32: 157 ; FUNC-LABEL: {{^}}fdiv_v4f32:
44 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 158 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
48 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 162 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
49 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 163 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
50 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 164 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
51 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 165 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
52 166
53 ; SI-DAG: v_rcp_f32 167 ; SI: v_div_fixup_f32
54 ; SI-DAG: v_mul_f32 168 ; SI: v_div_fixup_f32
55 ; SI-DAG: v_rcp_f32 169 ; SI: v_div_fixup_f32
56 ; SI-DAG: v_mul_f32 170 ; SI: v_div_fixup_f32
57 ; SI-DAG: v_rcp_f32 171 define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
58 ; SI-DAG: v_mul_f32
59 ; SI-DAG: v_rcp_f32
60 ; SI-DAG: v_mul_f32
61 define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
62 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 172 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
63 %a = load <4 x float>, <4 x float> addrspace(1) * %in 173 %a = load <4 x float>, <4 x float> addrspace(1) * %in
64 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr 174 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
65 %result = fdiv <4 x float> %a, %b 175 %result = fdiv <4 x float> %a, %b
66 store <4 x float> %result, <4 x float> addrspace(1)* %out 176 store <4 x float> %result, <4 x float> addrspace(1)* %out
67 ret void 177 ret void
68 } 178 }
179
180 ; FUNC-LABEL: {{^}}fdiv_v4f32_fast_math:
181 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
182 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
183 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
184 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
185 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
186 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
187 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
188 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
189
190 ; SI: v_rcp_f32
191 ; SI: v_rcp_f32
192 ; SI: v_rcp_f32
193 ; SI: v_rcp_f32
194 define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
195 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
196 %a = load <4 x float>, <4 x float> addrspace(1) * %in
197 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
198 %result = fdiv fast <4 x float> %a, %b
199 store <4 x float> %result, <4 x float> addrspace(1)* %out
200 ret void
201 }
202
203 ; FUNC-LABEL: {{^}}fdiv_v4f32_arcp_math:
204 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
205 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
206 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
207 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
208 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
209 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
210 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
211 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
212
213 ; SI: v_rcp_f32
214 ; SI: v_rcp_f32
215 ; SI: v_rcp_f32
216 ; SI: v_rcp_f32
217 define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
218 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
219 %a = load <4 x float>, <4 x float> addrspace(1) * %in
220 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
221 %result = fdiv arcp <4 x float> %a, %b
222 store <4 x float> %result, <4 x float> addrspace(1)* %out
223 ret void
224 }
225
226 attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals" }
227 attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "target-features"="-fp32-denormals" }
228 attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="+fp32-denormals" }
229
230 !0 = !{float 2.500000e+00}