annotate test/CodeGen/AMDGPU/v_cndmask.ll @ 128:c347d3398279 default tip

fix
author mir3636
date Wed, 06 Dec 2017 14:37:17 +0900
parents 803732b1fca8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
4 declare i32 @llvm.amdgcn.workitem.id.x() #1
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
5
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
6 ; GCN-LABEL: {{^}}v_cnd_nan_nosgpr:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
7 ; GCN: v_cmp_eq_u32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
8 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]}}, -1, v{{[0-9]+}}, [[COND]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
9 ; GCN-DAG: v{{[0-9]}}
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 ; All nan values are converted to 0xffffffff
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
11 ; GCN: s_endpgm
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
12 define amdgpu_kernel void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
13 %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
15 %f = load float, float addrspace(1)* %f.gep
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 %setcc = icmp ne i32 %c, 0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 store float %select, float addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
21
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
22
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 ; This requires slightly trickier SGPR operand legalization since the
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 ; single constant bus SGPR usage is the last operand, and it should
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 ; never be moved.
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
26
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
27 ; GCN-LABEL: {{^}}v_cnd_nan:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
28 ; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
29 ; GCN: v_cndmask_b32_e32 v{{[0-9]}}, -1, v{{[0-9]}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
30 ; GCN-DAG: v{{[0-9]}}
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 ; All nan values are converted to 0xffffffff
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
32 ; GCN: s_endpgm
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
33 define amdgpu_kernel void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 %setcc = icmp ne i32 %c, 0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 store float %select, float addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
39
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
40 ; Test different compare and select operand types for optimal code
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
41 ; shrinking.
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
42 ; (select (cmp (sgprX, constant)), constant, sgprZ)
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
43
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
44 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprZ_f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
45 ; GCN: s_load_dword [[X:s[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
46 ; GCN: s_load_dword [[Z:s[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
47 ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
48 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
49 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
50 define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float %x, float %z) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
51 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
52 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
53 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
54 %setcc = fcmp one float %x, 0.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
55 %select = select i1 %setcc, float 1.0, float %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
56 store float %select, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
57 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
58 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
59
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
60 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprX_f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
61 ; GCN: s_load_dword [[X:s[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
62 ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
63 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
64 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
65 define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)* %out, float %x) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
66 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
67 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
68 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
69 %setcc = fcmp one float %x, 0.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
70 %select = select i1 %setcc, float 1.0, float %x
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
71 store float %select, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
72 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
73 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
74
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
75 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprZ_f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
76 ; GCN: s_load_dword [[X:s[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
77 ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
78 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
79 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
80 define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(float addrspace(1)* %out, float %x, float %z) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
81 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
82 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
83 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
84 %setcc = fcmp one float %x, 0.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
85 %select = select i1 %setcc, float 0.0, float %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
86 store float %select, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
87 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
88 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
89
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
90 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprX_f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
91 ; GCN: s_load_dword [[X:s[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
92 ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
93 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
94 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
95 define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* %out, float %x) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
96 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
97 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
98 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
99 %setcc = fcmp one float %x, 0.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
100 %select = select i1 %setcc, float 0.0, float %x
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
101 store float %select, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
102 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
103 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
104
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
105 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_vgprZ_f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
106 ; GCN-DAG: s_load_dword [[X:s[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
107 ; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
108 ; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
109 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 0, [[Z]], [[COND]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
110 define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
111 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
112 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
113 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
114 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
115 %z = load float, float addrspace(1)* %z.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
116 %setcc = fcmp one float %x, 0.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
117 %select = select i1 %setcc, float 0.0, float %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
118 store float %select, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
119 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
120 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
122 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_vgprZ_f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
123 ; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
124 ; GCN-DAG: s_load_dword [[X:s[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
125 ; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
126 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 1.0, [[Z]], [[COND]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
127 define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
128 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
129 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
130 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
131 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
132 %z = load float, float addrspace(1)* %z.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
133 %setcc = fcmp one float %x, 0.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
134 %select = select i1 %setcc, float 1.0, float %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
135 store float %select, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
136 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
137 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
138
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
139 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_sgprZ_f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
140 ; GCN-DAG: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
141 ; GCN-DAG: s_load_dword [[Z:s[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
142 ; GCN-DAG: v_cmp_ngt_f32_e32 vcc, 0, [[X]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
143 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
144 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
145 define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float %z) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
146 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
147 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
148 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
149 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
150 %x = load float, float addrspace(1)* %x.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
151 %setcc = fcmp olt float %x, 0.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
152 %select = select i1 %setcc, float 1.0, float %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
153 store float %select, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
154 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
155 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
156
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
157 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_vgprZ_f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
158 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
159 ; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
160 ; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
161 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[Z]], vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
162 define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
163 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
164 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
165 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
166 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
167 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
168 %x = load volatile float, float addrspace(1)* %x.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
169 %z = load volatile float, float addrspace(1)* %z.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
170 %setcc = fcmp ult float %x, 0.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
171 %select = select i1 %setcc, float 1.0, float %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
172 store float %select, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
173 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
174 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
175
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
176 ; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
177 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
178 ; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
179 ; GCN: v_cmp_lt_i32_e32 vcc, -1, [[X]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
180 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 2, [[Z]], vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
181 define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i32 addrspace(1)* %z.ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
182 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
183 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
184 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
185 %z.gep = getelementptr inbounds i32, i32 addrspace(1)* %z.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
186 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
187 %x = load volatile i32, i32 addrspace(1)* %x.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
188 %z = load volatile i32, i32 addrspace(1)* %z.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
189 %setcc = icmp slt i32 %x, 0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
190 %select = select i1 %setcc, i32 2, i32 %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
191 store i32 %select, i32 addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
192 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
193 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
194
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
195 ; FIXME: Why does VI make the wrong regalloc choice?
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
196 ; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i64:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
197 ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[X_LO:[0-9]+]]:[[X_HI:[0-9]+]]{{\]}}
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
198 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[Z_LO:[0-9]+]]:[[Z_HI:[0-9]+]]{{\]}}
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
199 ; SI-DAG: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[X_LO]]:[[X_HI]]{{\]}}
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
200 ; SI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v[[Z_HI]], vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
201 ; SI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v[[Z_LO]], vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
202
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
203 ; VI-DAG: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[X_LO]]:[[X_HI]]{{\]}}
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
204 ; VI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v[[Z_HI]], vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
205 ; VI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v[[Z_LO]], vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
206 define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
207 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
208 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
209 %x.gep = getelementptr inbounds i64, i64 addrspace(1)* %x.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
210 %z.gep = getelementptr inbounds i64, i64 addrspace(1)* %z.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
211 %out.gep = getelementptr inbounds i64, i64 addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
212 %x = load volatile i64, i64 addrspace(1)* %x.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
213 %z = load volatile i64, i64 addrspace(1)* %z.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
214 %setcc = icmp slt i64 %x, 0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
215 %select = select i1 %setcc, i64 2, i64 %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
216 store i64 %select, i64 addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
217 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
218 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
219
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
220 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_vgprZ_k1_v4f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
221 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
222 ; GCN: {{buffer|flat}}_load_dwordx4
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
223
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
224 ; GCN: v_cmp_nge_f32_e32 vcc, 4.0, [[X]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
225 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
226 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
227 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
228 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
229 define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
230 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
231 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
232 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
233 %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
234 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
235 %x = load volatile float, float addrspace(1)* %x.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
236 %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
237 %setcc = fcmp ugt float %x, 4.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
238 %select = select i1 %setcc, <4 x float> %z, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0>
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
239 store <4 x float> %select, <4 x float> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
240 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
241 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
242
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
243 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_vgprZ_v4f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
244 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
245 ; GCN: {{buffer|flat}}_load_dwordx4
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
246
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
247 ; GCN: v_cmp_ge_f32_e32 vcc, 4.0, [[X]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
248 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
249 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
250 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
251 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
252 define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
253 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
254 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
255 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
256 %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
257 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
258 %x = load volatile float, float addrspace(1)* %x.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
259 %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
260 %setcc = fcmp ugt float %x, 4.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
261 %select = select i1 %setcc, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0>, <4 x float> %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
262 store <4 x float> %select, <4 x float> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
263 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
264 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
265
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
266 ; This must be swapped as a vector type before the condition has
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
267 ; multiple uses.
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
268
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
269 ; GCN-LABEL: {{^}}fcmp_k0_vgprX_select_k1_vgprZ_v4f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
270 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
271 ; GCN: {{buffer|flat}}_load_dwordx4
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
272
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
273 ; GCN: v_cmp_le_f32_e32 vcc, 4.0, [[X]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
274 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
275 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
276 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
277 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
278 define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
279 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
280 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
281 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
282 %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
283 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
284 %x = load volatile float, float addrspace(1)* %x.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
285 %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
286 %setcc = fcmp ugt float 4.0, %x
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
287 %select = select i1 %setcc, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0>, <4 x float> %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
288 store <4 x float> %select, <4 x float> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
289 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
290 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
291
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
292 ; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i1:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
293 ; GCN: load_dword
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
294 ; GCN: load_ubyte
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
295 ; GCN-DAG: v_cmp_gt_i32_e32 vcc, 0, v
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
296 ; DCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 1,
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
297 ; GCN-DAG: v_cmp_eq_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1, v
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
298 ; GCN-DAG: s_or_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, s{{\[[0-9]+:[0-9]+\]}}
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
299 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
300 ; GCN: store_byte
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
301 define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i1 addrspace(1)* %z.ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
302 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
303 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
304 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
305 %z.gep = getelementptr inbounds i1, i1 addrspace(1)* %z.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
306 %out.gep = getelementptr inbounds i1, i1 addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
307 %x = load volatile i32, i32 addrspace(1)* %x.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
308 %z = load volatile i1, i1 addrspace(1)* %z.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
309 %setcc = icmp slt i32 %x, 0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
310 %select = select i1 %setcc, i1 true, i1 %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
311 store i1 %select, i1 addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
312 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
313 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
314
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
315 ; Different types compared vs. selected
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
316 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_selectf64_k1_vgprZ_f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
317 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
318 ; GCN: {{buffer|flat}}_load_dwordx2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
319
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
320 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3ff00000
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
321 ; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
322 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
323 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
324 define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(double addrspace(1)* %out, float addrspace(1)* %x.ptr, double addrspace(1)* %z.ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
325 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
326 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
327 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
328 %z.gep = getelementptr inbounds double, double addrspace(1)* %z.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
329 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
330 %x = load volatile float, float addrspace(1)* %x.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
331 %z = load volatile double, double addrspace(1)* %z.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
332 %setcc = fcmp ult float %x, 0.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
333 %select = select i1 %setcc, double 1.0, double %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
334 store double %select, double addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
335 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
336 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
337
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
338 ; Different types compared vs. selected
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
339 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_selecti64_k1_vgprZ_f32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
340 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
341 ; GCN: {{buffer|flat}}_load_dwordx2
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
342
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
343 ; GCN: v_cmp_nlg_f32_e32 vcc, 0, [[X]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
344 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 3, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
345 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
346 define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(i64 addrspace(1)* %out, float addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
347 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
348 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
349 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
350 %z.gep = getelementptr inbounds i64, i64 addrspace(1)* %z.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
351 %out.gep = getelementptr inbounds i64, i64 addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
352 %x = load volatile float, float addrspace(1)* %x.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
353 %z = load volatile i64, i64 addrspace(1)* %z.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
354 %setcc = fcmp one float %x, 0.0
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
355 %select = select i1 %setcc, i64 3, i64 %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
356 store i64 %select, i64 addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
357 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
358 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
359
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
360 ; Different types compared vs. selected
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
361 ; GCN-LABEL: {{^}}icmp_vgprX_k0_selectf32_k1_vgprZ_i32:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
362 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
363 ; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
364
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
365 ; GCN: v_cmp_gt_u32_e32 vcc, 2, [[X]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
366 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[Z]], vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
367 define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(float addrspace(1)* %out, i32 addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
368 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
369 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
370 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
371 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
372 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
373 %x = load volatile i32, i32 addrspace(1)* %x.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
374 %z = load volatile float, float addrspace(1)* %z.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
375 %setcc = icmp ugt i32 %x, 1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
376 %select = select i1 %setcc, float 4.0, float %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
377 store float %select, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
378 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
379 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
380
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
381 ; FIXME: Should be able to handle multiple uses
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
382
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
383 ; GCN-LABEL: {{^}}fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2:
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
384 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
385
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
386 ; GCN: v_cmp_nle_f32_e32 vcc, 4.0, [[X]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
387 ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -1.0, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
388 ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -2.0, vcc
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
389 define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 {
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
390 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
391 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
392 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
393 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
394 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
395 %x = load volatile float, float addrspace(1)* %x.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
396 %z = load volatile float, float addrspace(1)* %z.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
397 %setcc = fcmp ugt float 4.0, %x
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
398 %select0 = select i1 %setcc, float -1.0, float %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
399 %select1 = select i1 %setcc, float -2.0, float %z
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
400 store volatile float %select0, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
401 store volatile float %select1, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
402 ret void
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
403 }
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
404
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
405 attributes #0 = { nounwind }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
406 attributes #1 = { nounwind readnone }