120
|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3 declare float @llvm.amdgcn.rcp.f32(float) #0
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 declare double @llvm.amdgcn.rcp.f64(double) #0
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 declare double @llvm.sqrt.f64(double) #0
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 declare float @llvm.sqrt.f32(float) #0
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8
|
120
|
9 ; FUNC-LABEL: {{^}}rcp_undef_f32:
|
|
10 ; SI-NOT: v_rcp_f32
|
121
|
11 define amdgpu_kernel void @rcp_undef_f32(float addrspace(1)* %out) #1 {
|
120
|
12 %rcp = call float @llvm.amdgcn.rcp.f32(float undef)
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 store float %rcp, float addrspace(1)* %out, align 4
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16
|
121
|
17 ; FUNC-LABEL: {{^}}rcp_2_f32:
|
|
18 ; SI-NOT: v_rcp_f32
|
|
19 ; SI: v_mov_b32_e32 v{{[0-9]+}}, 0.5
|
|
20 define amdgpu_kernel void @rcp_2_f32(float addrspace(1)* %out) #1 {
|
|
21 %rcp = call float @llvm.amdgcn.rcp.f32(float 2.0)
|
|
22 store float %rcp, float addrspace(1)* %out, align 4
|
|
23 ret void
|
|
24 }
|
|
25
|
|
26 ; FUNC-LABEL: {{^}}rcp_10_f32:
|
|
27 ; SI-NOT: v_rcp_f32
|
|
28 ; SI: v_mov_b32_e32 v{{[0-9]+}}, 0x3dcccccd
|
|
29 define amdgpu_kernel void @rcp_10_f32(float addrspace(1)* %out) #1 {
|
|
30 %rcp = call float @llvm.amdgcn.rcp.f32(float 10.0)
|
|
31 store float %rcp, float addrspace(1)* %out, align 4
|
|
32 ret void
|
|
33 }
|
|
34
|
120
|
35 ; FUNC-LABEL: {{^}}safe_no_fp32_denormals_rcp_f32:
|
|
36 ; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
|
|
37 ; SI-NOT: [[RESULT]]
|
|
38 ; SI: buffer_store_dword [[RESULT]]
|
121
|
39 define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(float addrspace(1)* %out, float %src) #1 {
|
120
|
40 %rcp = fdiv float 1.0, %src
|
|
41 store float %rcp, float addrspace(1)* %out, align 4
|
|
42 ret void
|
|
43 }
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44
|
120
|
45 ; FUNC-LABEL: {{^}}safe_f32_denormals_rcp_pat_f32:
|
|
46 ; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
|
|
47 ; SI-NOT: [[RESULT]]
|
|
48 ; SI: buffer_store_dword [[RESULT]]
|
121
|
49 define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #4 {
|
120
|
50 %rcp = fdiv float 1.0, %src
|
|
51 store float %rcp, float addrspace(1)* %out, align 4
|
|
52 ret void
|
|
53 }
|
|
54
|
|
55 ; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32:
|
|
56 ; SI: v_div_scale_f32
|
121
|
57 define amdgpu_kernel void @unsafe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #3 {
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
58 %rcp = fdiv float 1.0, %src
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
59 store float %rcp, float addrspace(1)* %out, align 4
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
60 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
61 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
62
|
120
|
63 ; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32:
|
|
64 ; SI: v_sqrt_f32_e32
|
|
65 ; SI: v_rcp_f32_e32
|
121
|
66 define amdgpu_kernel void @safe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {
|
120
|
67 %sqrt = call float @llvm.sqrt.f32(float %src)
|
|
68 %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
|
|
69 store float %rcp, float addrspace(1)* %out, align 4
|
|
70 ret void
|
|
71 }
|
|
72
|
|
73 ; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32:
|
|
74 ; SI: v_rsq_f32_e32
|
121
|
75 define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #2 {
|
120
|
76 %sqrt = call float @llvm.sqrt.f32(float %src)
|
|
77 %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
78 store float %rcp, float addrspace(1)* %out, align 4
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
79 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
80 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
81
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
82 ; FUNC-LABEL: {{^}}rcp_f64:
|
120
|
83 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
|
|
84 ; SI-NOT: [[RESULT]]
|
|
85 ; SI: buffer_store_dwordx2 [[RESULT]]
|
121
|
86 define amdgpu_kernel void @rcp_f64(double addrspace(1)* %out, double %src) #1 {
|
120
|
87 %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
|
|
88 store double %rcp, double addrspace(1)* %out, align 8
|
|
89 ret void
|
|
90 }
|
|
91
|
|
92 ; FUNC-LABEL: {{^}}unsafe_rcp_f64:
|
|
93 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
|
|
94 ; SI-NOT: [[RESULT]]
|
|
95 ; SI: buffer_store_dwordx2 [[RESULT]]
|
121
|
96 define amdgpu_kernel void @unsafe_rcp_f64(double addrspace(1)* %out, double %src) #2 {
|
120
|
97 %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
98 store double %rcp, double addrspace(1)* %out, align 8
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
99 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
100 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
101
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
102 ; FUNC-LABEL: {{^}}rcp_pat_f64:
|
120
|
103 ; SI: v_div_scale_f64
|
121
|
104 define amdgpu_kernel void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
105 %rcp = fdiv double 1.0, %src
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
106 store double %rcp, double addrspace(1)* %out, align 8
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
107 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
108 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
109
|
120
|
110 ; FUNC-LABEL: {{^}}unsafe_rcp_pat_f64:
|
|
111 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
|
|
112 ; SI-NOT: [[RESULT]]
|
|
113 ; SI: buffer_store_dwordx2 [[RESULT]]
|
121
|
114 define amdgpu_kernel void @unsafe_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {
|
120
|
115 %rcp = fdiv double 1.0, %src
|
|
116 store double %rcp, double addrspace(1)* %out, align 8
|
|
117 ret void
|
|
118 }
|
|
119
|
|
120 ; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f64:
|
|
121 ; SI-NOT: v_rsq_f64_e32
|
|
122 ; SI: v_sqrt_f64
|
|
123 ; SI: v_rcp_f64
|
121
|
124 define amdgpu_kernel void @safe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
|
120
|
125 %sqrt = call double @llvm.sqrt.f64(double %src)
|
|
126 %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
|
|
127 store double %rcp, double addrspace(1)* %out, align 8
|
|
128 ret void
|
|
129 }
|
|
130
|
|
131 ; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64:
|
|
132 ; SI: v_rsq_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
|
|
133 ; SI-NOT: [[RESULT]]
|
|
134 ; SI: buffer_store_dwordx2 [[RESULT]]
|
121
|
135 define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {
|
120
|
136 %sqrt = call double @llvm.sqrt.f64(double %src)
|
|
137 %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
138 store double %rcp, double addrspace(1)* %out, align 8
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
139 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
140 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
141
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
142 attributes #0 = { nounwind readnone }
|
120
|
143 attributes #1 = { nounwind "unsafe-fp-math"="false" "target-features"="-fp32-denormals" }
|
|
144 attributes #2 = { nounwind "unsafe-fp-math"="true" "target-features"="-fp32-denormals" }
|
|
145 attributes #3 = { nounwind "unsafe-fp-math"="false" "target-features"="+fp32-denormals" }
|
|
146 attributes #4 = { nounwind "unsafe-fp-math"="true" "target-features"="+fp32-denormals" }
|