annotate test/CodeGen/AMDGPU/fmed3.ll @ 146:3fc4d5c3e21e

set tail call flag for code segment in CGCAll
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sun, 23 Dec 2018 19:23:36 +0900
parents 3a76565eade5
children c2174574ed3a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
134
3a76565eade5 update 5.0.1
mir3636
parents: 121
diff changeset
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=SI %s
3a76565eade5 update 5.0.1
mir3636
parents: 121
diff changeset
2 ; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=SI %s
3a76565eade5 update 5.0.1
mir3636
parents: 121
diff changeset
3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
3a76565eade5 update 5.0.1
mir3636
parents: 121
diff changeset
4 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
3a76565eade5 update 5.0.1
mir3636
parents: 121
diff changeset
5 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
3a76565eade5 update 5.0.1
mir3636
parents: 121
diff changeset
6 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
7
120
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
8
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
9 ; GCN-LABEL: {{^}}v_test_nnan_input_fmed3_r_i_i_f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
10 ; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, v{{[0-9]+}}
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
11 ; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], 2.0, 4.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
12 define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
13 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
14 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
15 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
16 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
17 %a.add = fadd nnan float %a, 1.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
18 %max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
19 %med = call float @llvm.minnum.f32(float %max, float 4.0)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
20
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
21 store float %med, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
22 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
23 }
120
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
24
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
25 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f32:
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
26 ; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
27
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
28 ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
29 ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
30 define amdgpu_kernel void @v_test_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
31 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
32 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
33 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
34 %a = load float, float addrspace(1)* %gep0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
35
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
36 %max = call float @llvm.maxnum.f32(float %a, float 2.0)
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
37 %med = call float @llvm.minnum.f32(float %max, float 4.0)
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
38
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
39 store float %med, float addrspace(1)* %outgep
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
40 ret void
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
41 }
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
42
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
43 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_commute0_f32:
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
44 ; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
45
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
46 ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
47 ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
48 define amdgpu_kernel void @v_test_fmed3_r_i_i_commute0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
49 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
50 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
51 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
52 %a = load float, float addrspace(1)* %gep0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
53
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
54 %max = call float @llvm.maxnum.f32(float 2.0, float %a)
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
55 %med = call float @llvm.minnum.f32(float 4.0, float %max)
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
56
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
57 store float %med, float addrspace(1)* %outgep
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
58 ret void
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
59 }
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
60
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
61 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_commute1_f32:
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
62 ; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
63
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
64 ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
65 ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
66 define amdgpu_kernel void @v_test_fmed3_r_i_i_commute1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
67 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
68 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
69 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
70 %a = load float, float addrspace(1)* %gep0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
71
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
72 %max = call float @llvm.maxnum.f32(float %a, float 2.0)
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
73 %med = call float @llvm.minnum.f32(float 4.0, float %max)
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
74
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
75 store float %med, float addrspace(1)* %outgep
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
76 ret void
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
77 }
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
78
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
79 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_constant_order_f32:
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
80 ; GCN: v_max_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
81 ; GCN: v_min_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
82 define amdgpu_kernel void @v_test_fmed3_r_i_i_constant_order_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
83 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
84 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
85 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
86 %a = load float, float addrspace(1)* %gep0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
87
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
88 %max = call float @llvm.maxnum.f32(float %a, float 4.0)
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
89 %med = call float @llvm.minnum.f32(float %max, float 2.0)
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
90
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
91 store float %med, float addrspace(1)* %outgep
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
92 ret void
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
93 }
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
94
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
95
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
96 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_multi_use_f32:
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
97 ; GCN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
98 ; GCN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
99 define amdgpu_kernel void @v_test_fmed3_r_i_i_multi_use_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
100 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
101 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
102 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
103 %a = load float, float addrspace(1)* %gep0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
104
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
105 %max = call float @llvm.maxnum.f32(float %a, float 2.0)
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
106 %med = call float @llvm.minnum.f32(float %max, float 4.0)
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
107
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
108 store volatile float %med, float addrspace(1)* %outgep
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
109 store volatile float %max, float addrspace(1)* %outgep
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
110 ret void
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
111 }
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
112
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
113 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f64:
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
114 ; GCN: v_max_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 2.0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
115 ; GCN: v_min_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 4.0
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
116 define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
117 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
118 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
119 %outgep = getelementptr double, double addrspace(1)* %out, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
120 %a = load double, double addrspace(1)* %gep0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
121
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
122 %max = call double @llvm.maxnum.f64(double %a, double 2.0)
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
123 %med = call double @llvm.minnum.f64(double %max, double 4.0)
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
124
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
125 store double %med, double addrspace(1)* %outgep
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
126 ret void
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
127 }
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
128
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
129 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_no_nans_f32:
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
130 ; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
131 define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
132 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
133 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
134 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
135 %a = load float, float addrspace(1)* %gep0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
136
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
137 %max = call float @llvm.maxnum.f32(float %a, float 2.0)
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
138 %med = call float @llvm.minnum.f32(float %max, float 4.0)
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
139
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
140 store float %med, float addrspace(1)* %outgep
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
141 ret void
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
142 }
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
143
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
144 ; GCN-LABEL: {{^}}v_test_legacy_fmed3_r_i_i_f32:
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
145 ; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
146
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
147 ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
148 ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
149 define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
150 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
151 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
152 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
153 %a = load float, float addrspace(1)* %gep0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
154
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
155 ; fmax_legacy
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
156 %cmp0 = fcmp ule float %a, 2.0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
157 %max = select i1 %cmp0, float 2.0, float %a
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
158
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
159 ; fmin_legacy
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
160 %cmp1 = fcmp uge float %max, 4.0
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
161 %med = select i1 %cmp1, float 4.0, float %max
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
162
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
163 store float %med, float addrspace(1)* %outgep
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
164 ret void
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
165 }
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
166
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
167 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
168 ; GCN: {{buffer_|flat_|global_}}load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
169 ; GCN: {{buffer_|flat_|global_}}load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
170 ; GCN: {{buffer_|flat_|global_}}load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
171 ; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], [[B]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
172 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
173 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
174 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
175 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
176 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
177 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
178 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
179 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
180 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
181 %a.fneg = fsub float -0.0, %a
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
182 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
183 %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
184 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
185 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
186 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
187 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
188 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
189
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
190 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod1:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
191 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
192 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
193 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
194 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], -[[B]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
195 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
196 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
197 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
198 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
199 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
200 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
201 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
202 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
203 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
204 %b.fneg = fsub float -0.0, %b
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
205 %tmp0 = call float @llvm.minnum.f32(float %a, float %b.fneg)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
206 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b.fneg)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
207 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
208 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
209 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
210 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
211 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
212
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
213 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod2:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
214 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
215 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
216 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
217 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], -[[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
218 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
219 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
220 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
221 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
222 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
223 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
224 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
225 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
226 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
227 %c.fneg = fsub float -0.0, %c
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
228 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
229 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
230 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fneg)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
231 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
232 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
233 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
234 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
235
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
236 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod012:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
237 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
238 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
239 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
240 ; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], |[[B]]|, -|[[C]]|
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
241 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
242 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
243 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
244 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
245 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
246 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
247 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
248 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
249 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
250
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
251 %a.fneg = fsub float -0.0, %a
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
252 %b.fabs = call float @llvm.fabs.f32(float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
253 %c.fabs = call float @llvm.fabs.f32(float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
254 %c.fabs.fneg = fsub float -0.0, %c.fabs
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
255
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
256 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
257 %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
258 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
259 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
260
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
261 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
262 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
263 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
264
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
265 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_negabs012:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
266 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
267 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
268 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
269 ; GCN: v_med3_f32 v{{[0-9]+}}, -|[[A]]|, -|[[B]]|, -|[[C]]|
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
270 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
271 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
272 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
273 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
274 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
275 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
276 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
277 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
278 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
279
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
280 %a.fabs = call float @llvm.fabs.f32(float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
281 %a.fabs.fneg = fsub float -0.0, %a.fabs
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
282 %b.fabs = call float @llvm.fabs.f32(float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
283 %b.fabs.fneg = fsub float -0.0, %b.fabs
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
284 %c.fabs = call float @llvm.fabs.f32(float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
285 %c.fabs.fneg = fsub float -0.0, %c.fabs
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
286
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
287 %tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
288 %tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
289 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
290 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
291
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
292 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
293 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
294 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
295
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
296 ; GCN-LABEL: {{^}}v_nnan_inputs_med3_f32_pat0:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
297 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
298 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
299 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
300 ; GCN-DAG: v_add_f32_e32 [[A_ADD:v[0-9]+]], 1.0, [[A]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
301 ; GCN-DAG: v_add_f32_e32 [[B_ADD:v[0-9]+]], 2.0, [[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
302 ; GCN-DAG: v_add_f32_e32 [[C_ADD:v[0-9]+]], 4.0, [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
303 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A_ADD]], [[B_ADD]], [[C_ADD]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
304 define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
305 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
306 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
307 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
308 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
309 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
310 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
311 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
312 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
313
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
314 %a.nnan = fadd nnan float %a, 1.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
315 %b.nnan = fadd nnan float %b, 2.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
316 %c.nnan = fadd nnan float %c, 4.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
317
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
318 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
319 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
320 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
321 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
322 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
323 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
324 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
325
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
326 ; 16 combinations
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
327
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
328 ; 0: max(min(x, y), min(max(x, y), z))
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
329 ; 1: max(min(x, y), min(max(y, x), z))
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
330 ; 2: max(min(x, y), min(z, max(x, y)))
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
331 ; 3: max(min(x, y), min(z, max(y, x)))
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
332 ; 4: max(min(y, x), min(max(x, y), z))
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
333 ; 5: max(min(y, x), min(max(y, x), z))
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
334 ; 6: max(min(y, x), min(z, max(x, y)))
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
335 ; 7: max(min(y, x), min(z, max(y, x)))
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
336 ;
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
337 ; + commute outermost max
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
338
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
339 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
340 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
341 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
342 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
343 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
344 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
345 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
346 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
347 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
348 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
349 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
350 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
351 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
352 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
353 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
354 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
355 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
356 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
357 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
358 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
359 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
360
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
361 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat1:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
362 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
363 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
364 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
365 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
366 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
367 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
368 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
369 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
370 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
371 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
372 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
373 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
374 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
375 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
376 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
377 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
378 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
379 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
380 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
381 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
382
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
383 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat2:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
384 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
385 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
386 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
387 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
388 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
389 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
390 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
391 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
392 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
393 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
394 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
395 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
396 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
397 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
398 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
399 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
400 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
401 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
402 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
403 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
404
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
405 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat3:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
406 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
407 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
408 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
409 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
410 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
411 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
412 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
413 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
414 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
415 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
416 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
417 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
418 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
419 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
420 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
421 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
422 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
423 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
424 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
425 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
426
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
427 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat4:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
428 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
429 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
430 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
431 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
432 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
433 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
434 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
435 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
436 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
437 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
438 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
439 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
440 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
441 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
442 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
443 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
444 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
445 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
446 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
447 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
448
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
449 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat5:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
450 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
451 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
452 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
453 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
454 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
455 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
456 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
457 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
458 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
459 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
460 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
461 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
462 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
463 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
464 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
465 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
466 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
467 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
468 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
469 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
470
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
471 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat6:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
472 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
473 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
474 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
475 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
476 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
477 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
478 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
479 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
480 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
481 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
482 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
483 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
484 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
485 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
486 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
487 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
488 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
489 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
490 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
491 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
492
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
493 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat7:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
494 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
495 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
496 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
497 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
498 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
499 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
500 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
501 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
502 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
503 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
504 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
505 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
506 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
507 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
508 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
509 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
510 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
511 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
512 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
513 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
514
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
515 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat8:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
516 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
517 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
518 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
519 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
520 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
521 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
522 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
523 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
524 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
525 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
526 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
527 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
528 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
529 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
530 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
531 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
532 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
533 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
534 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
535 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
536
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
537 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat9:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
538 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
539 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
540 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
541 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
542 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
543 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
544 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
545 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
546 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
547 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
548 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
549 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
550 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
551 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
552 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
553 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
554 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
555 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
556 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
557 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
558
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
559 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat10:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
560 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
561 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
562 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
563 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
564 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
565 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
566 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
567 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
568 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
569 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
570 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
571 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
572 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
573 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
574 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
575 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
576 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
577 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
578 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
579 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
580
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
581 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat11:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
582 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
583 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
584 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
585 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
586 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
587 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
588 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
589 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
590 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
591 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
592 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
593 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
594 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
595 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
596 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
597 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
598 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
599 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
600 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
601 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
602
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
603 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat12:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
604 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
605 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
606 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
607 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
608 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
609 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
610 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
611 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
612 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
613 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
614 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
615 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
616 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
617 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
618 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
619 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
620 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
621 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
622 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
623 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
624
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
625 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat13:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
626 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
627 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
628 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
629 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
630 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
631 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
632 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
633 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
634 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
635 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
636 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
637 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
638 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
639 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
640 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
641 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
642 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
643 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
644 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
645 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
646
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
647 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat14:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
648 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
649 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
650 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
651 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
652 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
653 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
654 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
655 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
656 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
657 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
658 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
659 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
660 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
661 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
662 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
663 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
664 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
665 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
666 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
667 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
668
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
669 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat15:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
670 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
671 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
672 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
673 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
674 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
675 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
676 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
677 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
678 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
679 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
680 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
681 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
682 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
683 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
684 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
685 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
686 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
687 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
688 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
689 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
690
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
691 ; ---------------------------------------------------------------------
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
692 ; Negative patterns
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
693 ; ---------------------------------------------------------------------
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
694
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
695 ; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use0:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
696 ; GCN-DAG: v_min_f32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
697 ; GCN-DAG: v_max_f32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
698 ; GCN: v_min_f32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
699 ; GCN: v_max_f32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
700 define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
701 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
702 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
703 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
704 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
705 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
706 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
707 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
708 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
709 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
710 store volatile float %tmp0, float addrspace(1)* undef
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
711 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
712 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
713 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
714 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
715 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
716 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
717
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
718 ; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use1:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
719 define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
720 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
721 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
722 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
723 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
724 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
725 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
726 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
727 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
728 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
729 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
730 store volatile float %tmp1, float addrspace(1)* undef
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
731 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
732 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
733 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
734 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
735 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
736
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
737 ; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use2:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
738 define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
739 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
740 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
741 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
742 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
743 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
744 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
745 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
746 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
747 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
748 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
749 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
750 store volatile float %tmp2, float addrspace(1)* undef
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
751 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
752 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
753 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
754 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
755
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
756
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
757 ; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
758 define amdgpu_kernel void @v_test_safe_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
759 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
760 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
761 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
762 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
763 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
764 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
765 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
766 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
767 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
768 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
769 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
770 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
771 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
772 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
773 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
774
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
775 ; GCN-LABEL: {{^}}v_nnan_inputs_missing0_med3_f32_pat0:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
776 define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
777 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
778 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
779 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
780 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
781 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
782 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
783 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
784 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
785
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
786 %a.nnan = fadd float %a, 1.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
787 %b.nnan = fadd nnan float %b, 2.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
788 %c.nnan = fadd nnan float %c, 4.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
789
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
790 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
791 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
792 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
793 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
794 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
795 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
796 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
797
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
798 ; GCN-LABEL: {{^}}v_nnan_inputs_missing1_med3_f32_pat0:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
799 define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
800 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
801 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
802 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
803 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
804 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
805 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
806 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
807 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
808
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
809 %a.nnan = fadd nnan float %a, 1.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
810 %b.nnan = fadd float %b, 2.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
811 %c.nnan = fadd nnan float %c, 4.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
812
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
813 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
814 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
815 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
816 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
817 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
818 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
819 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
820
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
821 ; GCN-LABEL: {{^}}v_nnan_inputs_missing2_med3_f32_pat0:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
822 define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
823 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
824 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
825 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
826 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
827 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
828 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
829 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
830 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
831
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
832 %a.nnan = fadd nnan float %a, 1.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
833 %b.nnan = fadd nnan float %b, 2.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
834 %c.nnan = fadd float %c, 4.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
835
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
836 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
837 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
838 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
839 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
840 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
841 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
842 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
843
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
844 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
845 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
846 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
847 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
848 ; GCN-DAG: v_min_f32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
849 ; GCN-DAG: v_max_f32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
850 ; GCN-DAG: v_min_f32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
851 ; GCN-DAG: v_max_f32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
852 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
853 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
854 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
855 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
856 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
857 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
858 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
859 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
860 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
861 %a.fneg = fsub float -0.0, %a
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
862 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
863 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
864 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
865 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
866 store float %med3, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
867 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
868 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
869
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
870 ; A simple min and max is not sufficient
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
871 ; GCN-LABEL: {{^}}v_test_global_nnans_min_max_f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
872 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
873 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
874 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
875 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], [[A]], [[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
876 ; GCN: v_min_f32_e32 v{{[0-9]+}}, [[MAX]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
877 define amdgpu_kernel void @v_test_global_nnans_min_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
878 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
879 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
880 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
881 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
882 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
883 %a = load volatile float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
884 %b = load volatile float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
885 %c = load volatile float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
886 %max = call float @llvm.maxnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
887 %minmax = call float @llvm.minnum.f32(float %max, float %c)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
888 store float %minmax, float addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
889 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
890 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
891
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
892 ; GCN-LABEL: {{^}}v_test_nnan_input_fmed3_r_i_i_f16:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
893 ; SI: v_cvt_f32_f16
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
894 ; SI: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
895 ; SI: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
896 ; SI: v_cvt_f16_f32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
897
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
898 ; VI: v_add_f16_e32 v{{[0-9]+}}, 1.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
899 ; VI: v_max_f16_e32 v{{[0-9]+}}, 2.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
900 ; VI: v_min_f16_e32 v{{[0-9]+}}, 4.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
901
134
3a76565eade5 update 5.0.1
mir3636
parents: 121
diff changeset
902 ; GFX9: v_add_f16_e32 [[ADD:v[0-9]+]], 1.0
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
903 ; GFX9: v_med3_f16 v{{[0-9]+}}, [[ADD]], 2.0, 4.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
904 define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #1 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
905 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
906 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
907 %outgep = getelementptr half, half addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
908 %a = load half, half addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
909 %a.add = fadd nnan half %a, 1.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
910 %max = call half @llvm.maxnum.f16(half %a.add, half 2.0)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
911 %med = call half @llvm.minnum.f16(half %max, half 4.0)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
912
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
913 store half %med, half addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
914 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
915 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
916
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
917 ; GCN-LABEL: {{^}}v_nnan_inputs_med3_f16_pat0:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
918 ; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
919 ; GCN: {{buffer|flat|global}}_load_ushort [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
920 ; GCN: {{buffer|flat|global}}_load_ushort [[C:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
921
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
922 ; SI: v_cvt_f32_f16
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
923 ; SI: v_cvt_f32_f16
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
924 ; SI: v_add_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
925 ; SI: v_add_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
926 ; SI: v_add_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
927 ; SI: v_med3_f32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
928 ; SI: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
929
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
930
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
931 ; GFX89-DAG: v_add_f16_e32 [[A_ADD:v[0-9]+]], 1.0, [[A]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
932 ; GFX89-DAG: v_add_f16_e32 [[B_ADD:v[0-9]+]], 2.0, [[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
933 ; GFX89-DAG: v_add_f16_e32 [[C_ADD:v[0-9]+]], 4.0, [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
934
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
935 ; VI-DAG: v_min_f16
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
936 ; VI-DAG: v_max_f16
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
937 ; VI: v_min_f16
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
938 ; VI: v_max_f16
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
939
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
940 ; GFX9: v_med3_f16 v{{[0-9]+}}, [[A_ADD]], [[B_ADD]], [[C_ADD]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
941 define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #1 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
942 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
943 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
944 %gep1 = getelementptr half, half addrspace(1)* %bptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
945 %gep2 = getelementptr half, half addrspace(1)* %cptr, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
946 %outgep = getelementptr half, half addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
947 %a = load volatile half, half addrspace(1)* %gep0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
948 %b = load volatile half, half addrspace(1)* %gep1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
949 %c = load volatile half, half addrspace(1)* %gep2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
950
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
951 %a.nnan = fadd nnan half %a, 1.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
952 %b.nnan = fadd nnan half %b, 2.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
953 %c.nnan = fadd nnan half %c, 4.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
954
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
955 %tmp0 = call half @llvm.minnum.f16(half %a.nnan, half %b.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
956 %tmp1 = call half @llvm.maxnum.f16(half %a.nnan, half %b.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
957 %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %c.nnan)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
958 %med3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
959 store half %med3, half addrspace(1)* %outgep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
960 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
961 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
962
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
963 declare i32 @llvm.amdgcn.workitem.id.x() #0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
964 declare float @llvm.fabs.f32(float) #0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
965 declare float @llvm.minnum.f32(float, float) #0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
966 declare float @llvm.maxnum.f32(float, float) #0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
967 declare double @llvm.minnum.f64(double, double) #0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
968 declare double @llvm.maxnum.f64(double, double) #0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
969 declare half @llvm.fabs.f16(half) #0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
970 declare half @llvm.minnum.f16(half, half) #0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
971 declare half @llvm.maxnum.f16(half, half) #0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
972
120
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
973 attributes #0 = { nounwind readnone }
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
974 attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
1172e4bd9c6f update 4.0.0
mir3636
parents:
diff changeset
975 attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }