150
|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
2
|
|
3 ; Make sure to test with f32 and i32 compares. If we have to use float
|
|
4 ; compares, we always have multiple condition registers. If we can do
|
|
5 ; scalar compares, we don't want to use multiple condition registers.
|
|
6
|
|
7 ; GCN-LABEL: {{^}}opt_select_i32_and_cmp_i32:
|
|
8 ; GCN-DAG: v_cmp_ne_u32_e32 vcc,
|
|
9 ; GCN-DAG: v_cmp_ne_u32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
|
|
10 ; GCN: s_and_b64 vcc, vcc, [[CMP1]]
|
|
11 ; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
|
|
12 ; GCN-NOT: [[RESULT]]
|
|
13 ; GCN: buffer_store_dword [[RESULT]]
|
|
14 define amdgpu_kernel void @opt_select_i32_and_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
|
|
15 %icmp0 = icmp ne i32 %a, %b
|
|
16 %icmp1 = icmp ne i32 %a, %c
|
|
17 %and = and i1 %icmp0, %icmp1
|
|
18 %select = select i1 %and, i32 %x, i32 %y
|
|
19 store i32 %select, i32 addrspace(1)* %out
|
|
20 ret void
|
|
21 }
|
|
22
|
|
23 ; GCN-LABEL: {{^}}opt_select_i32_and_cmp_f32:
|
|
24 ; GCN-DAG: v_cmp_lg_f32_e32 vcc
|
|
25 ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
|
|
26 ; GCN: s_and_b64 vcc, vcc, [[CMP1]]
|
|
27 ; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
|
|
28 ; GCN-NOT: [[RESULT]]
|
|
29 ; GCN: buffer_store_dword [[RESULT]]
|
|
30 define amdgpu_kernel void @opt_select_i32_and_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
|
|
31 %fcmp0 = fcmp one float %a, %b
|
|
32 %fcmp1 = fcmp one float %a, %c
|
|
33 %and = and i1 %fcmp0, %fcmp1
|
|
34 %select = select i1 %and, i32 %x, i32 %y
|
|
35 store i32 %select, i32 addrspace(1)* %out
|
|
36 ret void
|
|
37 }
|
|
38
|
|
39 ; GCN-LABEL: {{^}}opt_select_i64_and_cmp_i32:
|
|
40 ; GCN-DAG: v_cmp_ne_u32_e32 vcc,
|
|
41 ; GCN-DAG: v_cmp_ne_u32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
|
|
42 ; GCN: s_and_b64 vcc, vcc, [[CMP1]]
|
|
43 ; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
|
|
44 ; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
|
|
45 ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
|
|
46 define amdgpu_kernel void @opt_select_i64_and_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
|
|
47 %icmp0 = icmp ne i32 %a, %b
|
|
48 %icmp1 = icmp ne i32 %a, %c
|
|
49 %and = and i1 %icmp0, %icmp1
|
|
50 %select = select i1 %and, i64 %x, i64 %y
|
|
51 store i64 %select, i64 addrspace(1)* %out
|
|
52 ret void
|
|
53 }
|
|
54
|
|
55 ; GCN-LABEL: {{^}}opt_select_i64_and_cmp_f32:
|
|
56 ; GCN-DAG: v_cmp_lg_f32_e32 vcc,
|
|
57 ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
|
|
58 ; GCN: s_and_b64 vcc, vcc, [[CMP1]]
|
|
59 ; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
|
|
60 ; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
|
|
61 ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
|
|
62 define amdgpu_kernel void @opt_select_i64_and_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
|
|
63 %fcmp0 = fcmp one float %a, %b
|
|
64 %fcmp1 = fcmp one float %a, %c
|
|
65 %and = and i1 %fcmp0, %fcmp1
|
|
66 %select = select i1 %and, i64 %x, i64 %y
|
|
67 store i64 %select, i64 addrspace(1)* %out
|
|
68 ret void
|
|
69 }
|
|
70
|
|
71 ; GCN-LABEL: {{^}}opt_select_i32_or_cmp_i32:
|
|
72 ; GCN-DAG: v_cmp_ne_u32_e32 vcc,
|
|
73 ; GCN-DAG: v_cmp_ne_u32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
|
|
74 ; GCN: s_or_b64 vcc, vcc, [[CMP1]]
|
|
75 ; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
|
|
76 ; GCN-NOT: [[RESULT]]
|
|
77 ; GCN: buffer_store_dword [[RESULT]]
|
|
78 ; GCN: s_endpgm
|
|
79 define amdgpu_kernel void @opt_select_i32_or_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
|
|
80 %icmp0 = icmp ne i32 %a, %b
|
|
81 %icmp1 = icmp ne i32 %a, %c
|
|
82 %or = or i1 %icmp0, %icmp1
|
|
83 %select = select i1 %or, i32 %x, i32 %y
|
|
84 store i32 %select, i32 addrspace(1)* %out
|
|
85 ret void
|
|
86 }
|
|
87
|
|
88 ; GCN-LABEL: {{^}}opt_select_i32_or_cmp_f32:
|
|
89 ; GCN-DAG: v_cmp_lg_f32_e32 vcc
|
|
90 ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
|
|
91 ; GCN: s_or_b64 vcc, vcc, [[CMP1]]
|
|
92 ; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
|
|
93 ; GCN-NOT: [[RESULT]]
|
|
94 ; GCN: buffer_store_dword [[RESULT]]
|
|
95 define amdgpu_kernel void @opt_select_i32_or_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
|
|
96 %fcmp0 = fcmp one float %a, %b
|
|
97 %fcmp1 = fcmp one float %a, %c
|
|
98 %or = or i1 %fcmp0, %fcmp1
|
|
99 %select = select i1 %or, i32 %x, i32 %y
|
|
100 store i32 %select, i32 addrspace(1)* %out
|
|
101 ret void
|
|
102 }
|
|
103
|
|
104 ; GCN-LABEL: {{^}}opt_select_i64_or_cmp_i32:
|
|
105 ; GCN-DAG: v_cmp_ne_u32_e32 vcc,
|
|
106 ; GCN-DAG: v_cmp_ne_u32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
|
|
107 ; GCN: s_or_b64 vcc, vcc, [[CMP1]]
|
|
108 ; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
|
|
109 ; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
|
|
110 ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
|
|
111 define amdgpu_kernel void @opt_select_i64_or_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
|
|
112 %icmp0 = icmp ne i32 %a, %b
|
|
113 %icmp1 = icmp ne i32 %a, %c
|
|
114 %or = or i1 %icmp0, %icmp1
|
|
115 %select = select i1 %or, i64 %x, i64 %y
|
|
116 store i64 %select, i64 addrspace(1)* %out
|
|
117 ret void
|
|
118 }
|
|
119
|
|
120 ; GCN-LABEL: {{^}}opt_select_i64_or_cmp_f32:
|
|
121 ; GCN-DAG: v_cmp_lg_f32_e32 vcc,
|
|
122 ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
|
|
123 ; GCN: s_or_b64 vcc, vcc, [[CMP1]]
|
|
124 ; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
|
|
125 ; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
|
|
126 ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
|
|
127 define amdgpu_kernel void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
|
|
128 %fcmp0 = fcmp one float %a, %b
|
|
129 %fcmp1 = fcmp one float %a, %c
|
|
130 %or = or i1 %fcmp0, %fcmp1
|
|
131 %select = select i1 %or, i64 %x, i64 %y
|
|
132 store i64 %select, i64 addrspace(1)* %out
|
|
133 ret void
|
|
134 }
|
|
135
|
|
136 ; GCN-LABEL: {{^}}regression:
|
|
137 ; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0
|
|
138 ; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
|
|
139 ; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
|
|
140
|
|
141 define amdgpu_kernel void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 {
|
|
142 entry:
|
|
143 %cmp0 = fcmp oeq float %c0, 1.0
|
|
144 br i1 %cmp0, label %if0, label %endif
|
|
145
|
|
146 if0:
|
|
147 %cmp1 = fcmp oeq float %c1, 0.0
|
|
148 br i1 %cmp1, label %if1, label %endif
|
|
149
|
|
150 if1:
|
|
151 %cmp2 = xor i1 %cmp1, true
|
|
152 br label %endif
|
|
153
|
|
154 endif:
|
|
155 %tmp0 = phi i1 [ true, %entry ], [ %cmp2, %if1 ], [ false, %if0 ]
|
|
156 %tmp2 = select i1 %tmp0, float 4.0, float 0.0
|
|
157 store float %tmp2, float addrspace(1)* %out
|
|
158 ret void
|
|
159 }
|
|
160
|
|
161 attributes #0 = { nounwind }
|