150
|
1 ;RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
221
|
2 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=FUNC %s
|
150
|
3 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
|
|
4
|
|
5 ; FUNC-LABEL: {{^}}test_select_v2i32:
|
|
6
|
|
7 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Z
|
|
8 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Y
|
|
9
|
221
|
10 ; VI: s_cmp_gt_i32
|
|
11 ; VI: s_cselect_b32
|
|
12 ; VI: s_cmp_gt_i32
|
|
13 ; VI: s_cselect_b32
|
|
14
|
150
|
15 ; SI: v_cmp_gt_i32_e32 vcc
|
|
16 ; SI: v_cndmask_b32_e32
|
|
17 ; SI: v_cmp_gt_i32_e32 vcc
|
|
18 ; SI: v_cndmask_b32_e32
|
|
19
|
|
20 define amdgpu_kernel void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1, <2 x i32> %val) {
|
|
21 entry:
|
|
22 %load0 = load <2 x i32>, <2 x i32> addrspace(1)* %in0
|
|
23 %load1 = load <2 x i32>, <2 x i32> addrspace(1)* %in1
|
|
24 %cmp = icmp sgt <2 x i32> %load0, %load1
|
|
25 %result = select <2 x i1> %cmp, <2 x i32> %val, <2 x i32> %load0
|
|
26 store <2 x i32> %result, <2 x i32> addrspace(1)* %out
|
|
27 ret void
|
|
28 }
|
|
29
|
|
30 ; FUNC-LABEL: {{^}}test_select_v2f32:
|
|
31
|
|
32 ; EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
33 ; EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
34
|
|
35 ; SI: v_cmp_neq_f32_e32 vcc
|
|
36 ; SI: v_cndmask_b32_e32
|
|
37 ; SI: v_cmp_neq_f32_e32 vcc
|
|
38 ; SI: v_cndmask_b32_e32
|
|
39
|
|
40 define amdgpu_kernel void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) {
|
|
41 entry:
|
|
42 %0 = load <2 x float>, <2 x float> addrspace(1)* %in0
|
|
43 %1 = load <2 x float>, <2 x float> addrspace(1)* %in1
|
|
44 %cmp = fcmp une <2 x float> %0, %1
|
|
45 %result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
|
|
46 store <2 x float> %result, <2 x float> addrspace(1)* %out
|
|
47 ret void
|
|
48 }
|
|
49
|
|
50 ;FUNC-LABEL: {{^}}test_select_v4i32:
|
|
51
|
|
52 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[4].X
|
|
53 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].W
|
|
54 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Z
|
|
55 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Y
|
|
56
|
221
|
57 ; VI: s_cselect_b32
|
|
58 ; VI: s_cselect_b32
|
|
59 ; VI: s_cselect_b32
|
|
60 ; VI: s_cselect_b32
|
|
61
|
150
|
62 ; SI: v_cndmask_b32_e32
|
|
63 ; SI: v_cndmask_b32_e32
|
|
64 ; SI: v_cndmask_b32_e32
|
|
65 ; SI: v_cndmask_b32_e32
|
|
66
|
|
67 define amdgpu_kernel void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1, <4 x i32> %val) {
|
|
68 entry:
|
|
69 %load0 = load <4 x i32>, <4 x i32> addrspace(1)* %in0
|
|
70 %load1 = load <4 x i32>, <4 x i32> addrspace(1)* %in1
|
|
71 %cmp = icmp sgt <4 x i32> %load0, %load1
|
|
72 %result = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %load0
|
|
73 store <4 x i32> %result, <4 x i32> addrspace(1)* %out
|
|
74 ret void
|
|
75 }
|
|
76
|
|
77 ;FUNC-LABEL: {{^}}test_select_v4f32:
|
|
78 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
79 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
80 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
81 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
82
|
|
83 ; SI: v_cndmask_b32_e32
|
|
84 ; SI: v_cndmask_b32_e32
|
|
85 ; SI: v_cndmask_b32_e32
|
|
86 ; SI: v_cndmask_b32_e32
|
|
87 define amdgpu_kernel void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) {
|
|
88 entry:
|
|
89 %0 = load <4 x float>, <4 x float> addrspace(1)* %in0
|
|
90 %1 = load <4 x float>, <4 x float> addrspace(1)* %in1
|
|
91 %cmp = fcmp une <4 x float> %0, %1
|
|
92 %result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
|
|
93 store <4 x float> %result, <4 x float> addrspace(1)* %out
|
|
94 ret void
|
|
95 }
|