150
|
1 ;RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
221
|
2 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=FUNC %s
|
150
|
3 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
|
|
4
|
|
5 ; FUNC-LABEL: {{^}}test_select_v2i32:
|
|
6
|
|
7 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Z
|
|
8 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Y
|
|
9
|
221
|
10 ; VI: s_cmp_gt_i32
|
|
11 ; VI: s_cselect_b32
|
|
12 ; VI: s_cmp_gt_i32
|
|
13 ; VI: s_cselect_b32
|
|
14
|
236
|
15 ; SI-DAG: s_cmp_gt_i32
|
|
16 ; SI-DAG: s_cselect_b32
|
|
17 ; SI-DAG: s_cmp_gt_i32
|
|
18 ; SI-DAG: s_cselect_b32
|
150
|
19
|
252
|
20 define amdgpu_kernel void @test_select_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1, <2 x i32> %val) {
|
150
|
21 entry:
|
252
|
22 %load0 = load <2 x i32>, ptr addrspace(1) %in0
|
|
23 %load1 = load <2 x i32>, ptr addrspace(1) %in1
|
150
|
24 %cmp = icmp sgt <2 x i32> %load0, %load1
|
|
25 %result = select <2 x i1> %cmp, <2 x i32> %val, <2 x i32> %load0
|
252
|
26 store <2 x i32> %result, ptr addrspace(1) %out
|
150
|
27 ret void
|
|
28 }
|
|
29
|
|
30 ; FUNC-LABEL: {{^}}test_select_v2f32:
|
|
31
|
|
32 ; EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
33 ; EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
34
|
|
35 ; SI: v_cmp_neq_f32_e32 vcc
|
|
36 ; SI: v_cndmask_b32_e32
|
|
37 ; SI: v_cmp_neq_f32_e32 vcc
|
|
38 ; SI: v_cndmask_b32_e32
|
|
39
|
252
|
40 define amdgpu_kernel void @test_select_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
|
150
|
41 entry:
|
252
|
42 %0 = load <2 x float>, ptr addrspace(1) %in0
|
|
43 %1 = load <2 x float>, ptr addrspace(1) %in1
|
150
|
44 %cmp = fcmp une <2 x float> %0, %1
|
|
45 %result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
|
252
|
46 store <2 x float> %result, ptr addrspace(1) %out
|
150
|
47 ret void
|
|
48 }
|
|
49
|
|
50 ;FUNC-LABEL: {{^}}test_select_v4i32:
|
|
51
|
|
52 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[4].X
|
|
53 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].W
|
|
54 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Z
|
|
55 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Y
|
|
56
|
221
|
57 ; VI: s_cselect_b32
|
|
58 ; VI: s_cselect_b32
|
|
59 ; VI: s_cselect_b32
|
|
60 ; VI: s_cselect_b32
|
|
61
|
236
|
62 ; SI-DAG: s_cselect_b32
|
|
63 ; SI-DAG: s_cselect_b32
|
|
64 ; SI-DAG: s_cselect_b32
|
|
65 ; SI-DAG: s_cselect_b32
|
150
|
66
|
252
|
67 define amdgpu_kernel void @test_select_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1, <4 x i32> %val) {
|
150
|
68 entry:
|
252
|
69 %load0 = load <4 x i32>, ptr addrspace(1) %in0
|
|
70 %load1 = load <4 x i32>, ptr addrspace(1) %in1
|
150
|
71 %cmp = icmp sgt <4 x i32> %load0, %load1
|
|
72 %result = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %load0
|
252
|
73 store <4 x i32> %result, ptr addrspace(1) %out
|
150
|
74 ret void
|
|
75 }
|
|
76
|
|
77 ;FUNC-LABEL: {{^}}test_select_v4f32:
|
|
78 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
79 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
80 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
81 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
82
|
|
83 ; SI: v_cndmask_b32_e32
|
|
84 ; SI: v_cndmask_b32_e32
|
|
85 ; SI: v_cndmask_b32_e32
|
|
86 ; SI: v_cndmask_b32_e32
|
252
|
87 define amdgpu_kernel void @test_select_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
|
150
|
88 entry:
|
252
|
89 %0 = load <4 x float>, ptr addrspace(1) %in0
|
|
90 %1 = load <4 x float>, ptr addrspace(1) %in1
|
150
|
91 %cmp = fcmp une <4 x float> %0, %1
|
|
92 %result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
|
252
|
93 store <4 x float> %result, ptr addrspace(1) %out
|
150
|
94 ret void
|
|
95 }
|