Mercurial > hg > Members > tobaru > cbc > CbC_llvm
comparison test/CodeGen/AMDGPU/fdiv.ll @ 120:1172e4bd9c6f
update 4.0.0
author | mir3636 |
---|---|
date | Fri, 25 Nov 2016 19:14:25 +0900 |
parents | afa8332a0e37 |
children | 803732b1fca8 |
comparison
equal
deleted
inserted
replaced
101:34baf5011add | 120:1172e4bd9c6f |
---|---|
1 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s | 1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s |
2 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s | 2 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s |
3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s | |
4 | 3 |
5 ; These tests check that fdiv is expanded correctly and also test that the | 4 ; These tests check that fdiv is expanded correctly and also test that the |
6 ; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate | 5 ; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate |
7 ; instruction groups. | 6 ; instruction groups. |
8 | 7 |
8 ; These test check that fdiv using unsafe_fp_math, coarse fp div, and IEEE754 fp div. | |
9 | |
9 ; FUNC-LABEL: {{^}}fdiv_f32: | 10 ; FUNC-LABEL: {{^}}fdiv_f32: |
10 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z | 11 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W |
11 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y | 12 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS |
12 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS | 13 |
13 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS | 14 ; SI: v_div_scale_f32 |
15 ; SI-DAG: v_div_scale_f32 | |
14 | 16 |
15 ; SI-DAG: v_rcp_f32 | 17 ; SI-DAG: v_rcp_f32 |
16 ; SI-DAG: v_mul_f32 | 18 ; SI: v_fma_f32 |
17 define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) { | 19 ; SI: v_fma_f32 |
18 entry: | 20 ; SI: v_mul_f32 |
19 %0 = fdiv float %a, %b | 21 ; SI: v_fma_f32 |
20 store float %0, float addrspace(1)* %out | 22 ; SI: v_fma_f32 |
21 ret void | 23 ; SI: v_fma_f32 |
22 } | 24 ; SI: v_div_fmas_f32 |
23 | 25 ; SI: v_div_fixup_f32 |
24 | 26 define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 { |
27 entry: | |
28 %fdiv = fdiv float %a, %b | |
29 store float %fdiv, float addrspace(1)* %out | |
30 ret void | |
31 } | |
32 | |
33 ; FUNC-LABEL: {{^}}fdiv_25ulp_f32: | |
34 ; SI: v_cndmask_b32 | |
35 ; SI: v_mul_f32 | |
36 ; SI: v_rcp_f32 | |
37 ; SI: v_mul_f32 | |
38 ; SI: v_mul_f32 | |
39 define void @fdiv_25ulp_f32(float addrspace(1)* %out, float %a, float %b) #0 { | |
40 entry: | |
41 %fdiv = fdiv float %a, %b, !fpmath !0 | |
42 store float %fdiv, float addrspace(1)* %out | |
43 ret void | |
44 } | |
45 | |
46 ; Use correct fdiv | |
47 ; FUNC-LABEL: {{^}}fdiv_25ulp_denormals_f32: | |
48 ; SI: v_fma_f32 | |
49 ; SI: v_div_fmas_f32 | |
50 ; SI: v_div_fixup_f32 | |
51 define void @fdiv_25ulp_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 { | |
52 entry: | |
53 %fdiv = fdiv float %a, %b, !fpmath !0 | |
54 store float %fdiv, float addrspace(1)* %out | |
55 ret void | |
56 } | |
57 | |
58 ; FUNC-LABEL: {{^}}fdiv_fast_denormals_f32: | |
59 ; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} | |
60 ; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] | |
61 ; SI-NOT: [[RESULT]] | |
62 ; SI: buffer_store_dword [[RESULT]] | |
63 define void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 { | |
64 entry: | |
65 %fdiv = fdiv fast float %a, %b | |
66 store float %fdiv, float addrspace(1)* %out | |
67 ret void | |
68 } | |
69 | |
70 ; FUNC-LABEL: {{^}}fdiv_f32_fast_math: | |
71 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W | |
72 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS | |
73 | |
74 ; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} | |
75 ; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] | |
76 ; SI-NOT: [[RESULT]] | |
77 ; SI: buffer_store_dword [[RESULT]] | |
78 define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 { | |
79 entry: | |
80 %fdiv = fdiv fast float %a, %b | |
81 store float %fdiv, float addrspace(1)* %out | |
82 ret void | |
83 } | |
84 | |
85 ; FUNC-LABEL: {{^}}fdiv_f32_arcp_math: | |
86 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W | |
87 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS | |
88 | |
89 ; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} | |
90 ; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] | |
91 ; SI-NOT: [[RESULT]] | |
92 ; SI: buffer_store_dword [[RESULT]] | |
93 define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 { | |
94 entry: | |
95 %fdiv = fdiv arcp float %a, %b | |
96 store float %fdiv, float addrspace(1)* %out | |
97 ret void | |
98 } | |
25 | 99 |
26 ; FUNC-LABEL: {{^}}fdiv_v2f32: | 100 ; FUNC-LABEL: {{^}}fdiv_v2f32: |
27 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z | 101 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z |
28 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y | 102 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y |
29 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS | 103 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS |
30 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS | 104 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS |
31 | 105 |
32 ; SI-DAG: v_rcp_f32 | 106 ; SI: v_div_scale_f32 |
33 ; SI-DAG: v_mul_f32 | 107 ; SI: v_div_scale_f32 |
34 ; SI-DAG: v_rcp_f32 | 108 ; SI: v_div_scale_f32 |
35 ; SI-DAG: v_mul_f32 | 109 ; SI: v_div_scale_f32 |
36 define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { | 110 define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { |
37 entry: | 111 entry: |
38 %0 = fdiv <2 x float> %a, %b | 112 %fdiv = fdiv <2 x float> %a, %b |
39 store <2 x float> %0, <2 x float> addrspace(1)* %out | 113 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out |
114 ret void | |
115 } | |
116 | |
117 ; FUNC-LABEL: {{^}}fdiv_ulp25_v2f32: | |
118 ; SI: v_cmp_gt_f32 | |
119 ; SI: v_cmp_gt_f32 | |
120 define void @fdiv_ulp25_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { | |
121 entry: | |
122 %fdiv = fdiv arcp <2 x float> %a, %b, !fpmath !0 | |
123 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out | |
124 ret void | |
125 } | |
126 | |
127 ; FUNC-LABEL: {{^}}fdiv_v2f32_fast_math: | |
128 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z | |
129 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y | |
130 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS | |
131 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS | |
132 | |
133 ; SI: v_rcp_f32 | |
134 ; SI: v_rcp_f32 | |
135 define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { | |
136 entry: | |
137 %fdiv = fdiv fast <2 x float> %a, %b | |
138 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out | |
139 ret void | |
140 } | |
141 | |
142 ; FUNC-LABEL: {{^}}fdiv_v2f32_arcp_math: | |
143 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z | |
144 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y | |
145 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS | |
146 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS | |
147 | |
148 ; SI: v_rcp_f32 | |
149 ; SI: v_rcp_f32 | |
150 define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { | |
151 entry: | |
152 %fdiv = fdiv arcp <2 x float> %a, %b | |
153 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out | |
40 ret void | 154 ret void |
41 } | 155 } |
42 | 156 |
43 ; FUNC-LABEL: {{^}}fdiv_v4f32: | 157 ; FUNC-LABEL: {{^}}fdiv_v4f32: |
44 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} | 158 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
48 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS | 162 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS |
49 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS | 163 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS |
50 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS | 164 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS |
51 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS | 165 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS |
52 | 166 |
53 ; SI-DAG: v_rcp_f32 | 167 ; SI: v_div_fixup_f32 |
54 ; SI-DAG: v_mul_f32 | 168 ; SI: v_div_fixup_f32 |
55 ; SI-DAG: v_rcp_f32 | 169 ; SI: v_div_fixup_f32 |
56 ; SI-DAG: v_mul_f32 | 170 ; SI: v_div_fixup_f32 |
57 ; SI-DAG: v_rcp_f32 | 171 define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { |
58 ; SI-DAG: v_mul_f32 | |
59 ; SI-DAG: v_rcp_f32 | |
60 ; SI-DAG: v_mul_f32 | |
61 define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { | |
62 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 | 172 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 |
63 %a = load <4 x float>, <4 x float> addrspace(1) * %in | 173 %a = load <4 x float>, <4 x float> addrspace(1) * %in |
64 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr | 174 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr |
65 %result = fdiv <4 x float> %a, %b | 175 %result = fdiv <4 x float> %a, %b |
66 store <4 x float> %result, <4 x float> addrspace(1)* %out | 176 store <4 x float> %result, <4 x float> addrspace(1)* %out |
67 ret void | 177 ret void |
68 } | 178 } |
179 | |
180 ; FUNC-LABEL: {{^}}fdiv_v4f32_fast_math: | |
181 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} | |
182 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} | |
183 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} | |
184 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} | |
185 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS | |
186 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS | |
187 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS | |
188 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS | |
189 | |
190 ; SI: v_rcp_f32 | |
191 ; SI: v_rcp_f32 | |
192 ; SI: v_rcp_f32 | |
193 ; SI: v_rcp_f32 | |
194 define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { | |
195 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 | |
196 %a = load <4 x float>, <4 x float> addrspace(1) * %in | |
197 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr | |
198 %result = fdiv fast <4 x float> %a, %b | |
199 store <4 x float> %result, <4 x float> addrspace(1)* %out | |
200 ret void | |
201 } | |
202 | |
203 ; FUNC-LABEL: {{^}}fdiv_v4f32_arcp_math: | |
204 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} | |
205 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} | |
206 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} | |
207 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} | |
208 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS | |
209 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS | |
210 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS | |
211 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS | |
212 | |
213 ; SI: v_rcp_f32 | |
214 ; SI: v_rcp_f32 | |
215 ; SI: v_rcp_f32 | |
216 ; SI: v_rcp_f32 | |
217 define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { | |
218 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 | |
219 %a = load <4 x float>, <4 x float> addrspace(1) * %in | |
220 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr | |
221 %result = fdiv arcp <4 x float> %a, %b | |
222 store <4 x float> %result, <4 x float> addrspace(1)* %out | |
223 ret void | |
224 } | |
225 | |
226 attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals" } | |
227 attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "target-features"="-fp32-denormals" } | |
228 attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="+fp32-denormals" } | |
229 | |
230 !0 = !{float 2.500000e+00} |