120
|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN %s
|
|
2 ; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN %s
|
|
3
|
|
4 declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
5 declare float @llvm.minnum.f32(float, float) #0
|
|
6 declare float @llvm.maxnum.f32(float, float) #0
|
|
7 declare double @llvm.minnum.f64(double, double) #0
|
|
8 declare double @llvm.maxnum.f64(double, double) #0
|
|
9
|
|
10 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f32:
|
|
11 ; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
|
12
|
|
13 ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
|
14 ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
|
15 define void @v_test_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
|
16 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
17 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
18 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
19 %a = load float, float addrspace(1)* %gep0
|
|
20
|
|
21 %max = call float @llvm.maxnum.f32(float %a, float 2.0)
|
|
22 %med = call float @llvm.minnum.f32(float %max, float 4.0)
|
|
23
|
|
24 store float %med, float addrspace(1)* %outgep
|
|
25 ret void
|
|
26 }
|
|
27
|
|
28 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_commute0_f32:
|
|
29 ; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
|
30
|
|
31 ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
|
32 ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
|
33 define void @v_test_fmed3_r_i_i_commute0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
|
34 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
35 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
36 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
37 %a = load float, float addrspace(1)* %gep0
|
|
38
|
|
39 %max = call float @llvm.maxnum.f32(float 2.0, float %a)
|
|
40 %med = call float @llvm.minnum.f32(float 4.0, float %max)
|
|
41
|
|
42 store float %med, float addrspace(1)* %outgep
|
|
43 ret void
|
|
44 }
|
|
45
|
|
46 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_commute1_f32:
|
|
47 ; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
|
48
|
|
49 ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
|
50 ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
|
51 define void @v_test_fmed3_r_i_i_commute1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
|
52 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
53 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
54 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
55 %a = load float, float addrspace(1)* %gep0
|
|
56
|
|
57 %max = call float @llvm.maxnum.f32(float %a, float 2.0)
|
|
58 %med = call float @llvm.minnum.f32(float 4.0, float %max)
|
|
59
|
|
60 store float %med, float addrspace(1)* %outgep
|
|
61 ret void
|
|
62 }
|
|
63
|
|
64 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_constant_order_f32:
|
|
65 ; GCN: v_max_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
|
66 ; GCN: v_min_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
|
67 define void @v_test_fmed3_r_i_i_constant_order_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
|
68 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
69 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
70 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
71 %a = load float, float addrspace(1)* %gep0
|
|
72
|
|
73 %max = call float @llvm.maxnum.f32(float %a, float 4.0)
|
|
74 %med = call float @llvm.minnum.f32(float %max, float 2.0)
|
|
75
|
|
76 store float %med, float addrspace(1)* %outgep
|
|
77 ret void
|
|
78 }
|
|
79
|
|
80
|
|
81 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_multi_use_f32:
|
|
82 ; GCN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
|
83 ; GCN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
|
84 define void @v_test_fmed3_r_i_i_multi_use_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
|
85 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
86 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
87 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
88 %a = load float, float addrspace(1)* %gep0
|
|
89
|
|
90 %max = call float @llvm.maxnum.f32(float %a, float 2.0)
|
|
91 %med = call float @llvm.minnum.f32(float %max, float 4.0)
|
|
92
|
|
93 store volatile float %med, float addrspace(1)* %outgep
|
|
94 store volatile float %max, float addrspace(1)* %outgep
|
|
95 ret void
|
|
96 }
|
|
97
|
|
98 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f64:
|
|
99 ; GCN: v_max_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 2.0
|
|
100 ; GCN: v_min_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 4.0
|
|
101 define void @v_test_fmed3_r_i_i_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #1 {
|
|
102 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
103 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
|
|
104 %outgep = getelementptr double, double addrspace(1)* %out, i32 %tid
|
|
105 %a = load double, double addrspace(1)* %gep0
|
|
106
|
|
107 %max = call double @llvm.maxnum.f64(double %a, double 2.0)
|
|
108 %med = call double @llvm.minnum.f64(double %max, double 4.0)
|
|
109
|
|
110 store double %med, double addrspace(1)* %outgep
|
|
111 ret void
|
|
112 }
|
|
113
|
|
114 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_no_nans_f32:
|
|
115 ; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
|
116 define void @v_test_fmed3_r_i_i_no_nans_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
|
|
117 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
118 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
119 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
120 %a = load float, float addrspace(1)* %gep0
|
|
121
|
|
122 %max = call float @llvm.maxnum.f32(float %a, float 2.0)
|
|
123 %med = call float @llvm.minnum.f32(float %max, float 4.0)
|
|
124
|
|
125 store float %med, float addrspace(1)* %outgep
|
|
126 ret void
|
|
127 }
|
|
128
|
|
129 ; GCN-LABEL: {{^}}v_test_legacy_fmed3_r_i_i_f32:
|
|
130 ; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
|
131
|
|
132 ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
|
133 ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
|
134 define void @v_test_legacy_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
|
135 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
136 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
137 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
138 %a = load float, float addrspace(1)* %gep0
|
|
139
|
|
140 ; fmax_legacy
|
|
141 %cmp0 = fcmp ule float %a, 2.0
|
|
142 %max = select i1 %cmp0, float 2.0, float %a
|
|
143
|
|
144 ; fmin_legacy
|
|
145 %cmp1 = fcmp uge float %max, 4.0
|
|
146 %med = select i1 %cmp1, float 4.0, float %max
|
|
147
|
|
148 store float %med, float addrspace(1)* %outgep
|
|
149 ret void
|
|
150 }
|
|
151
|
|
152 attributes #0 = { nounwind readnone }
|
|
153 attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
|
|
154 attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
|