annotate llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll @ 206:f17a3b42b08b

Added tag before-12 for changeset b7591485f4cd
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 07 Jun 2021 21:25:57 +0900
parents 0572611fdcc8
children 2e18cbf3894f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
150
anatofuz
parents:
diff changeset
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
anatofuz
parents:
diff changeset
2 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GCN,SI
anatofuz
parents:
diff changeset
3 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GCN,GFX89,VI
anatofuz
parents:
diff changeset
4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GCN,GFX89,GFX9
anatofuz
parents:
diff changeset
5
anatofuz
parents:
diff changeset
6 define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float %x, float %y) #0 {
anatofuz
parents:
diff changeset
7 ; SI-LABEL: s_cvt_pkrtz_v2f16_f32:
anatofuz
parents:
diff changeset
8 ; SI: ; %bb.0:
anatofuz
parents:
diff changeset
9 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb
anatofuz
parents:
diff changeset
10 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
anatofuz
parents:
diff changeset
11 ; SI-NEXT: s_mov_b32 s7, 0xf000
anatofuz
parents:
diff changeset
12 ; SI-NEXT: s_mov_b32 s6, -1
anatofuz
parents:
diff changeset
13 ; SI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
14 ; SI-NEXT: v_mov_b32_e32 v0, s3
anatofuz
parents:
diff changeset
15 ; SI-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, s2, v0
anatofuz
parents:
diff changeset
16 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
anatofuz
parents:
diff changeset
17 ; SI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
18 ;
anatofuz
parents:
diff changeset
19 ; VI-LABEL: s_cvt_pkrtz_v2f16_f32:
anatofuz
parents:
diff changeset
20 ; VI: ; %bb.0:
anatofuz
parents:
diff changeset
21 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
anatofuz
parents:
diff changeset
22 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
anatofuz
parents:
diff changeset
23 ; VI-NEXT: s_waitcnt lgkmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
24 ; VI-NEXT: v_mov_b32_e32 v0, s1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
25 ; VI-NEXT: v_cvt_pkrtz_f16_f32 v2, s0, v0
150
anatofuz
parents:
diff changeset
26 ; VI-NEXT: v_mov_b32_e32 v0, s2
anatofuz
parents:
diff changeset
27 ; VI-NEXT: v_mov_b32_e32 v1, s3
anatofuz
parents:
diff changeset
28 ; VI-NEXT: flat_store_dword v[0:1], v2
anatofuz
parents:
diff changeset
29 ; VI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
30 ;
anatofuz
parents:
diff changeset
31 ; GFX9-LABEL: s_cvt_pkrtz_v2f16_f32:
anatofuz
parents:
diff changeset
32 ; GFX9: ; %bb.0:
anatofuz
parents:
diff changeset
33 ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
anatofuz
parents:
diff changeset
34 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
anatofuz
parents:
diff changeset
35 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
36 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
37 ; GFX9-NEXT: v_cvt_pkrtz_f16_f32 v2, s0, v0
150
anatofuz
parents:
diff changeset
38 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
anatofuz
parents:
diff changeset
39 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
anatofuz
parents:
diff changeset
40 ; GFX9-NEXT: global_store_dword v[0:1], v2, off
anatofuz
parents:
diff changeset
41 ; GFX9-NEXT: s_endpgm
anatofuz
parents:
diff changeset
42 %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
anatofuz
parents:
diff changeset
43 store <2 x half> %result, <2 x half> addrspace(1)* %out
anatofuz
parents:
diff changeset
44 ret void
anatofuz
parents:
diff changeset
45 }
anatofuz
parents:
diff changeset
46
anatofuz
parents:
diff changeset
47 define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(<2 x half> addrspace(1)* %out, float %x) #0 {
anatofuz
parents:
diff changeset
48 ; SI-LABEL: s_cvt_pkrtz_samereg_v2f16_f32:
anatofuz
parents:
diff changeset
49 ; SI: ; %bb.0:
anatofuz
parents:
diff changeset
50 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
anatofuz
parents:
diff changeset
51 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
anatofuz
parents:
diff changeset
52 ; SI-NEXT: s_mov_b32 s7, 0xf000
anatofuz
parents:
diff changeset
53 ; SI-NEXT: s_mov_b32 s6, -1
anatofuz
parents:
diff changeset
54 ; SI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
55 ; SI-NEXT: v_cvt_pkrtz_f16_f32_e64 v0, s2, s2
anatofuz
parents:
diff changeset
56 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
anatofuz
parents:
diff changeset
57 ; SI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
58 ;
anatofuz
parents:
diff changeset
59 ; VI-LABEL: s_cvt_pkrtz_samereg_v2f16_f32:
anatofuz
parents:
diff changeset
60 ; VI: ; %bb.0:
anatofuz
parents:
diff changeset
61 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
anatofuz
parents:
diff changeset
62 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
anatofuz
parents:
diff changeset
63 ; VI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
64 ; VI-NEXT: v_mov_b32_e32 v0, s2
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
65 ; VI-NEXT: v_cvt_pkrtz_f16_f32 v2, s0, s0
150
anatofuz
parents:
diff changeset
66 ; VI-NEXT: v_mov_b32_e32 v1, s3
anatofuz
parents:
diff changeset
67 ; VI-NEXT: flat_store_dword v[0:1], v2
anatofuz
parents:
diff changeset
68 ; VI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
69 ;
anatofuz
parents:
diff changeset
70 ; GFX9-LABEL: s_cvt_pkrtz_samereg_v2f16_f32:
anatofuz
parents:
diff changeset
71 ; GFX9: ; %bb.0:
anatofuz
parents:
diff changeset
72 ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
anatofuz
parents:
diff changeset
73 ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x2c
anatofuz
parents:
diff changeset
74 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
75 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
76 ; GFX9-NEXT: v_cvt_pkrtz_f16_f32 v2, s0, s0
150
anatofuz
parents:
diff changeset
77 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
anatofuz
parents:
diff changeset
78 ; GFX9-NEXT: global_store_dword v[0:1], v2, off
anatofuz
parents:
diff changeset
79 ; GFX9-NEXT: s_endpgm
anatofuz
parents:
diff changeset
80 %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %x)
anatofuz
parents:
diff changeset
81 store <2 x half> %result, <2 x half> addrspace(1)* %out
anatofuz
parents:
diff changeset
82 ret void
anatofuz
parents:
diff changeset
83 }
anatofuz
parents:
diff changeset
84
anatofuz
parents:
diff changeset
85 define amdgpu_kernel void @s_cvt_pkrtz_undef_undef(<2 x half> addrspace(1)* %out) #0 {
anatofuz
parents:
diff changeset
86 ; GCN-LABEL: s_cvt_pkrtz_undef_undef:
anatofuz
parents:
diff changeset
87 ; GCN: ; %bb.0:
anatofuz
parents:
diff changeset
88 ; GCN-NEXT: s_endpgm
anatofuz
parents:
diff changeset
89 %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef)
anatofuz
parents:
diff changeset
90 store <2 x half> %result, <2 x half> addrspace(1)* %out
anatofuz
parents:
diff changeset
91 ret void
anatofuz
parents:
diff changeset
92 }
anatofuz
parents:
diff changeset
93
anatofuz
parents:
diff changeset
94 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
anatofuz
parents:
diff changeset
95 ; SI-LABEL: v_cvt_pkrtz_v2f16_f32:
anatofuz
parents:
diff changeset
96 ; SI: ; %bb.0:
anatofuz
parents:
diff changeset
97 ; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
anatofuz
parents:
diff changeset
98 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
anatofuz
parents:
diff changeset
99 ; SI-NEXT: s_mov_b32 s3, 0xf000
anatofuz
parents:
diff changeset
100 ; SI-NEXT: s_mov_b32 s2, 0
anatofuz
parents:
diff changeset
101 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
anatofuz
parents:
diff changeset
102 ; SI-NEXT: v_mov_b32_e32 v1, 0
anatofuz
parents:
diff changeset
103 ; SI-NEXT: s_mov_b64 s[6:7], s[2:3]
anatofuz
parents:
diff changeset
104 ; SI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
105 ; SI-NEXT: s_mov_b64 s[0:1], s[10:11]
anatofuz
parents:
diff changeset
106 ; SI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
anatofuz
parents:
diff changeset
107 ; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
anatofuz
parents:
diff changeset
108 ; SI-NEXT: s_mov_b64 s[10:11], s[2:3]
anatofuz
parents:
diff changeset
109 ; SI-NEXT: s_waitcnt vmcnt(0)
anatofuz
parents:
diff changeset
110 ; SI-NEXT: v_cvt_pkrtz_f16_f32_e32 v2, v2, v3
anatofuz
parents:
diff changeset
111 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[8:11], 0 addr64
anatofuz
parents:
diff changeset
112 ; SI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
113 ;
anatofuz
parents:
diff changeset
114 ; VI-LABEL: v_cvt_pkrtz_v2f16_f32:
anatofuz
parents:
diff changeset
115 ; VI: ; %bb.0:
anatofuz
parents:
diff changeset
116 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
anatofuz
parents:
diff changeset
117 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
anatofuz
parents:
diff changeset
118 ; VI-NEXT: v_lshlrev_b32_e32 v4, 2, v0
anatofuz
parents:
diff changeset
119 ; VI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
120 ; VI-NEXT: v_mov_b32_e32 v1, s7
anatofuz
parents:
diff changeset
121 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v4
anatofuz
parents:
diff changeset
122 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
anatofuz
parents:
diff changeset
123 ; VI-NEXT: v_mov_b32_e32 v3, s1
anatofuz
parents:
diff changeset
124 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v4
anatofuz
parents:
diff changeset
125 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
126 ; VI-NEXT: flat_load_dword v0, v[0:1]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
127 ; VI-NEXT: flat_load_dword v1, v[2:3]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
128 ; VI-NEXT: v_mov_b32_e32 v5, s5
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
129 ; VI-NEXT: v_add_u32_e32 v4, vcc, s4, v4
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
130 ; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
150
anatofuz
parents:
diff changeset
131 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
132 ; VI-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, v1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
133 ; VI-NEXT: flat_store_dword v[4:5], v0
150
anatofuz
parents:
diff changeset
134 ; VI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
135 ;
anatofuz
parents:
diff changeset
136 ; GFX9-LABEL: v_cvt_pkrtz_v2f16_f32:
anatofuz
parents:
diff changeset
137 ; GFX9: ; %bb.0:
anatofuz
parents:
diff changeset
138 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
anatofuz
parents:
diff changeset
139 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
anatofuz
parents:
diff changeset
140 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0
anatofuz
parents:
diff changeset
141 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
142 ; GFX9-NEXT: v_mov_b32_e32 v1, s7
anatofuz
parents:
diff changeset
143 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v4
anatofuz
parents:
diff changeset
144 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
anatofuz
parents:
diff changeset
145 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
anatofuz
parents:
diff changeset
146 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v4
anatofuz
parents:
diff changeset
147 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
148 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
149 ; GFX9-NEXT: global_load_dword v1, v[2:3], off
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
150 ; GFX9-NEXT: v_mov_b32_e32 v5, s5
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
151 ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, s4, v4
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
152 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
150
anatofuz
parents:
diff changeset
153 ; GFX9-NEXT: s_waitcnt vmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
154 ; GFX9-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, v1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
155 ; GFX9-NEXT: global_store_dword v[4:5], v0, off
150
anatofuz
parents:
diff changeset
156 ; GFX9-NEXT: s_endpgm
anatofuz
parents:
diff changeset
157 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
158 %tid.ext = sext i32 %tid to i64
anatofuz
parents:
diff changeset
159 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
anatofuz
parents:
diff changeset
160 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
anatofuz
parents:
diff changeset
161 %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
anatofuz
parents:
diff changeset
162 %a = load volatile float, float addrspace(1)* %a.gep
anatofuz
parents:
diff changeset
163 %b = load volatile float, float addrspace(1)* %b.gep
anatofuz
parents:
diff changeset
164 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %b)
anatofuz
parents:
diff changeset
165 store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
166 ret void
anatofuz
parents:
diff changeset
167 }
anatofuz
parents:
diff changeset
168
anatofuz
parents:
diff changeset
169 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
anatofuz
parents:
diff changeset
170 ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_reg_imm:
anatofuz
parents:
diff changeset
171 ; SI: ; %bb.0:
anatofuz
parents:
diff changeset
172 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
anatofuz
parents:
diff changeset
173 ; SI-NEXT: s_mov_b32 s7, 0xf000
anatofuz
parents:
diff changeset
174 ; SI-NEXT: s_mov_b32 s6, 0
anatofuz
parents:
diff changeset
175 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
anatofuz
parents:
diff changeset
176 ; SI-NEXT: v_mov_b32_e32 v1, 0
anatofuz
parents:
diff changeset
177 ; SI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
178 ; SI-NEXT: s_mov_b64 s[4:5], s[2:3]
anatofuz
parents:
diff changeset
179 ; SI-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
anatofuz
parents:
diff changeset
180 ; SI-NEXT: s_mov_b64 s[2:3], s[6:7]
anatofuz
parents:
diff changeset
181 ; SI-NEXT: s_waitcnt vmcnt(0)
anatofuz
parents:
diff changeset
182 ; SI-NEXT: v_cvt_pkrtz_f16_f32_e64 v2, v2, 1.0
anatofuz
parents:
diff changeset
183 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
anatofuz
parents:
diff changeset
184 ; SI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
185 ;
anatofuz
parents:
diff changeset
186 ; VI-LABEL: v_cvt_pkrtz_v2f16_f32_reg_imm:
anatofuz
parents:
diff changeset
187 ; VI: ; %bb.0:
anatofuz
parents:
diff changeset
188 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
anatofuz
parents:
diff changeset
189 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
anatofuz
parents:
diff changeset
190 ; VI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
191 ; VI-NEXT: v_mov_b32_e32 v1, s3
anatofuz
parents:
diff changeset
192 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2
anatofuz
parents:
diff changeset
193 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
194 ; VI-NEXT: flat_load_dword v0, v[0:1]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
195 ; VI-NEXT: v_mov_b32_e32 v3, s1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
196 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
197 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
150
anatofuz
parents:
diff changeset
198 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
199 ; VI-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, 1.0
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
200 ; VI-NEXT: flat_store_dword v[2:3], v0
150
anatofuz
parents:
diff changeset
201 ; VI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
202 ;
anatofuz
parents:
diff changeset
203 ; GFX9-LABEL: v_cvt_pkrtz_v2f16_f32_reg_imm:
anatofuz
parents:
diff changeset
204 ; GFX9: ; %bb.0:
anatofuz
parents:
diff changeset
205 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
anatofuz
parents:
diff changeset
206 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
anatofuz
parents:
diff changeset
207 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
208 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
anatofuz
parents:
diff changeset
209 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
anatofuz
parents:
diff changeset
210 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
211 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
212 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
213 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
214 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
150
anatofuz
parents:
diff changeset
215 ; GFX9-NEXT: s_waitcnt vmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
216 ; GFX9-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, 1.0
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
217 ; GFX9-NEXT: global_store_dword v[2:3], v0, off
150
anatofuz
parents:
diff changeset
218 ; GFX9-NEXT: s_endpgm
anatofuz
parents:
diff changeset
219 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
220 %tid.ext = sext i32 %tid to i64
anatofuz
parents:
diff changeset
221 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
anatofuz
parents:
diff changeset
222 %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
anatofuz
parents:
diff changeset
223 %a = load volatile float, float addrspace(1)* %a.gep
anatofuz
parents:
diff changeset
224 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float 1.0)
anatofuz
parents:
diff changeset
225 store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
226 ret void
anatofuz
parents:
diff changeset
227 }
anatofuz
parents:
diff changeset
228
anatofuz
parents:
diff changeset
229 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
anatofuz
parents:
diff changeset
230 ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_imm_reg:
anatofuz
parents:
diff changeset
231 ; SI: ; %bb.0:
anatofuz
parents:
diff changeset
232 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
anatofuz
parents:
diff changeset
233 ; SI-NEXT: s_mov_b32 s7, 0xf000
anatofuz
parents:
diff changeset
234 ; SI-NEXT: s_mov_b32 s6, 0
anatofuz
parents:
diff changeset
235 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
anatofuz
parents:
diff changeset
236 ; SI-NEXT: v_mov_b32_e32 v1, 0
anatofuz
parents:
diff changeset
237 ; SI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
238 ; SI-NEXT: s_mov_b64 s[4:5], s[2:3]
anatofuz
parents:
diff changeset
239 ; SI-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
anatofuz
parents:
diff changeset
240 ; SI-NEXT: s_mov_b64 s[2:3], s[6:7]
anatofuz
parents:
diff changeset
241 ; SI-NEXT: s_waitcnt vmcnt(0)
anatofuz
parents:
diff changeset
242 ; SI-NEXT: v_cvt_pkrtz_f16_f32_e32 v2, 1.0, v2
anatofuz
parents:
diff changeset
243 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
anatofuz
parents:
diff changeset
244 ; SI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
245 ;
anatofuz
parents:
diff changeset
246 ; VI-LABEL: v_cvt_pkrtz_v2f16_f32_imm_reg:
anatofuz
parents:
diff changeset
247 ; VI: ; %bb.0:
anatofuz
parents:
diff changeset
248 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
anatofuz
parents:
diff changeset
249 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
anatofuz
parents:
diff changeset
250 ; VI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
251 ; VI-NEXT: v_mov_b32_e32 v1, s3
anatofuz
parents:
diff changeset
252 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2
anatofuz
parents:
diff changeset
253 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
254 ; VI-NEXT: flat_load_dword v0, v[0:1]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
255 ; VI-NEXT: v_mov_b32_e32 v3, s1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
256 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
257 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
150
anatofuz
parents:
diff changeset
258 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
259 ; VI-NEXT: v_cvt_pkrtz_f16_f32 v0, 1.0, v0
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
260 ; VI-NEXT: flat_store_dword v[2:3], v0
150
anatofuz
parents:
diff changeset
261 ; VI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
262 ;
anatofuz
parents:
diff changeset
263 ; GFX9-LABEL: v_cvt_pkrtz_v2f16_f32_imm_reg:
anatofuz
parents:
diff changeset
264 ; GFX9: ; %bb.0:
anatofuz
parents:
diff changeset
265 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
anatofuz
parents:
diff changeset
266 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
anatofuz
parents:
diff changeset
267 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
268 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
anatofuz
parents:
diff changeset
269 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
anatofuz
parents:
diff changeset
270 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
271 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
272 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
273 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
274 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
150
anatofuz
parents:
diff changeset
275 ; GFX9-NEXT: s_waitcnt vmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
276 ; GFX9-NEXT: v_cvt_pkrtz_f16_f32 v0, 1.0, v0
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
277 ; GFX9-NEXT: global_store_dword v[2:3], v0, off
150
anatofuz
parents:
diff changeset
278 ; GFX9-NEXT: s_endpgm
anatofuz
parents:
diff changeset
279 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
280 %tid.ext = sext i32 %tid to i64
anatofuz
parents:
diff changeset
281 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
anatofuz
parents:
diff changeset
282 %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
anatofuz
parents:
diff changeset
283 %a = load volatile float, float addrspace(1)* %a.gep
anatofuz
parents:
diff changeset
284 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 1.0, float %a)
anatofuz
parents:
diff changeset
285 store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
286 ret void
anatofuz
parents:
diff changeset
287 }
anatofuz
parents:
diff changeset
288
anatofuz
parents:
diff changeset
289 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
anatofuz
parents:
diff changeset
290 ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_lo:
anatofuz
parents:
diff changeset
291 ; SI: ; %bb.0:
anatofuz
parents:
diff changeset
292 ; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
anatofuz
parents:
diff changeset
293 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
anatofuz
parents:
diff changeset
294 ; SI-NEXT: s_mov_b32 s3, 0xf000
anatofuz
parents:
diff changeset
295 ; SI-NEXT: s_mov_b32 s2, 0
anatofuz
parents:
diff changeset
296 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
anatofuz
parents:
diff changeset
297 ; SI-NEXT: v_mov_b32_e32 v1, 0
anatofuz
parents:
diff changeset
298 ; SI-NEXT: s_mov_b64 s[6:7], s[2:3]
anatofuz
parents:
diff changeset
299 ; SI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
300 ; SI-NEXT: s_mov_b64 s[0:1], s[10:11]
anatofuz
parents:
diff changeset
301 ; SI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
anatofuz
parents:
diff changeset
302 ; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
anatofuz
parents:
diff changeset
303 ; SI-NEXT: s_mov_b64 s[10:11], s[2:3]
anatofuz
parents:
diff changeset
304 ; SI-NEXT: s_waitcnt vmcnt(0)
anatofuz
parents:
diff changeset
305 ; SI-NEXT: v_cvt_pkrtz_f16_f32_e64 v2, -v2, v3
anatofuz
parents:
diff changeset
306 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[8:11], 0 addr64
anatofuz
parents:
diff changeset
307 ; SI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
308 ;
anatofuz
parents:
diff changeset
309 ; VI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_lo:
anatofuz
parents:
diff changeset
310 ; VI: ; %bb.0:
anatofuz
parents:
diff changeset
311 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
anatofuz
parents:
diff changeset
312 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
anatofuz
parents:
diff changeset
313 ; VI-NEXT: v_lshlrev_b32_e32 v4, 2, v0
anatofuz
parents:
diff changeset
314 ; VI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
315 ; VI-NEXT: v_mov_b32_e32 v1, s7
anatofuz
parents:
diff changeset
316 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v4
anatofuz
parents:
diff changeset
317 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
anatofuz
parents:
diff changeset
318 ; VI-NEXT: v_mov_b32_e32 v3, s1
anatofuz
parents:
diff changeset
319 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v4
anatofuz
parents:
diff changeset
320 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
321 ; VI-NEXT: flat_load_dword v0, v[0:1]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
322 ; VI-NEXT: flat_load_dword v1, v[2:3]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
323 ; VI-NEXT: v_mov_b32_e32 v5, s5
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
324 ; VI-NEXT: v_add_u32_e32 v4, vcc, s4, v4
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
325 ; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
150
anatofuz
parents:
diff changeset
326 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
327 ; VI-NEXT: v_cvt_pkrtz_f16_f32 v0, -v0, v1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
328 ; VI-NEXT: flat_store_dword v[4:5], v0
150
anatofuz
parents:
diff changeset
329 ; VI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
330 ;
anatofuz
parents:
diff changeset
331 ; GFX9-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_lo:
anatofuz
parents:
diff changeset
332 ; GFX9: ; %bb.0:
anatofuz
parents:
diff changeset
333 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
anatofuz
parents:
diff changeset
334 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
anatofuz
parents:
diff changeset
335 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0
anatofuz
parents:
diff changeset
336 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
337 ; GFX9-NEXT: v_mov_b32_e32 v1, s7
anatofuz
parents:
diff changeset
338 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v4
anatofuz
parents:
diff changeset
339 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
anatofuz
parents:
diff changeset
340 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
anatofuz
parents:
diff changeset
341 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v4
anatofuz
parents:
diff changeset
342 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
343 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
344 ; GFX9-NEXT: global_load_dword v1, v[2:3], off
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
345 ; GFX9-NEXT: v_mov_b32_e32 v5, s5
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
346 ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, s4, v4
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
347 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
150
anatofuz
parents:
diff changeset
348 ; GFX9-NEXT: s_waitcnt vmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
349 ; GFX9-NEXT: v_cvt_pkrtz_f16_f32 v0, -v0, v1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
350 ; GFX9-NEXT: global_store_dword v[4:5], v0, off
150
anatofuz
parents:
diff changeset
351 ; GFX9-NEXT: s_endpgm
anatofuz
parents:
diff changeset
352 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
353 %tid.ext = sext i32 %tid to i64
anatofuz
parents:
diff changeset
354 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
anatofuz
parents:
diff changeset
355 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
anatofuz
parents:
diff changeset
356 %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
anatofuz
parents:
diff changeset
357 %a = load volatile float, float addrspace(1)* %a.gep
anatofuz
parents:
diff changeset
358 %b = load volatile float, float addrspace(1)* %b.gep
anatofuz
parents:
diff changeset
359 %neg.a = fsub float -0.0, %a
anatofuz
parents:
diff changeset
360 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.a, float %b)
anatofuz
parents:
diff changeset
361 store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
362 ret void
anatofuz
parents:
diff changeset
363 }
anatofuz
parents:
diff changeset
364
anatofuz
parents:
diff changeset
365 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
anatofuz
parents:
diff changeset
366 ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_hi:
anatofuz
parents:
diff changeset
367 ; SI: ; %bb.0:
anatofuz
parents:
diff changeset
368 ; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
anatofuz
parents:
diff changeset
369 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
anatofuz
parents:
diff changeset
370 ; SI-NEXT: s_mov_b32 s3, 0xf000
anatofuz
parents:
diff changeset
371 ; SI-NEXT: s_mov_b32 s2, 0
anatofuz
parents:
diff changeset
372 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
anatofuz
parents:
diff changeset
373 ; SI-NEXT: v_mov_b32_e32 v1, 0
anatofuz
parents:
diff changeset
374 ; SI-NEXT: s_mov_b64 s[6:7], s[2:3]
anatofuz
parents:
diff changeset
375 ; SI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
376 ; SI-NEXT: s_mov_b64 s[0:1], s[10:11]
anatofuz
parents:
diff changeset
377 ; SI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
anatofuz
parents:
diff changeset
378 ; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
anatofuz
parents:
diff changeset
379 ; SI-NEXT: s_mov_b64 s[10:11], s[2:3]
anatofuz
parents:
diff changeset
380 ; SI-NEXT: s_waitcnt vmcnt(0)
anatofuz
parents:
diff changeset
381 ; SI-NEXT: v_cvt_pkrtz_f16_f32_e64 v2, v2, -v3
anatofuz
parents:
diff changeset
382 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[8:11], 0 addr64
anatofuz
parents:
diff changeset
383 ; SI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
384 ;
anatofuz
parents:
diff changeset
385 ; VI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_hi:
anatofuz
parents:
diff changeset
386 ; VI: ; %bb.0:
anatofuz
parents:
diff changeset
387 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
anatofuz
parents:
diff changeset
388 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
anatofuz
parents:
diff changeset
389 ; VI-NEXT: v_lshlrev_b32_e32 v4, 2, v0
anatofuz
parents:
diff changeset
390 ; VI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
391 ; VI-NEXT: v_mov_b32_e32 v1, s7
anatofuz
parents:
diff changeset
392 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v4
anatofuz
parents:
diff changeset
393 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
anatofuz
parents:
diff changeset
394 ; VI-NEXT: v_mov_b32_e32 v3, s1
anatofuz
parents:
diff changeset
395 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v4
anatofuz
parents:
diff changeset
396 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
397 ; VI-NEXT: flat_load_dword v0, v[0:1]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
398 ; VI-NEXT: flat_load_dword v1, v[2:3]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
399 ; VI-NEXT: v_mov_b32_e32 v5, s5
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
400 ; VI-NEXT: v_add_u32_e32 v4, vcc, s4, v4
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
401 ; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
150
anatofuz
parents:
diff changeset
402 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
403 ; VI-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, -v1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
404 ; VI-NEXT: flat_store_dword v[4:5], v0
150
anatofuz
parents:
diff changeset
405 ; VI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
406 ;
anatofuz
parents:
diff changeset
407 ; GFX9-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_hi:
anatofuz
parents:
diff changeset
408 ; GFX9: ; %bb.0:
anatofuz
parents:
diff changeset
409 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
anatofuz
parents:
diff changeset
410 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
anatofuz
parents:
diff changeset
411 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0
anatofuz
parents:
diff changeset
412 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
413 ; GFX9-NEXT: v_mov_b32_e32 v1, s7
anatofuz
parents:
diff changeset
414 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v4
anatofuz
parents:
diff changeset
415 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
anatofuz
parents:
diff changeset
416 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
anatofuz
parents:
diff changeset
417 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v4
anatofuz
parents:
diff changeset
418 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
419 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
420 ; GFX9-NEXT: global_load_dword v1, v[2:3], off
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
421 ; GFX9-NEXT: v_mov_b32_e32 v5, s5
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
422 ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, s4, v4
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
423 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
150
anatofuz
parents:
diff changeset
424 ; GFX9-NEXT: s_waitcnt vmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
425 ; GFX9-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, -v1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
426 ; GFX9-NEXT: global_store_dword v[4:5], v0, off
150
anatofuz
parents:
diff changeset
427 ; GFX9-NEXT: s_endpgm
anatofuz
parents:
diff changeset
428 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
429 %tid.ext = sext i32 %tid to i64
anatofuz
parents:
diff changeset
430 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
anatofuz
parents:
diff changeset
431 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
anatofuz
parents:
diff changeset
432 %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
anatofuz
parents:
diff changeset
433 %a = load volatile float, float addrspace(1)* %a.gep
anatofuz
parents:
diff changeset
434 %b = load volatile float, float addrspace(1)* %b.gep
anatofuz
parents:
diff changeset
435 %neg.b = fsub float -0.0, %b
anatofuz
parents:
diff changeset
436 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %neg.b)
anatofuz
parents:
diff changeset
437 store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
438 ret void
anatofuz
parents:
diff changeset
439 }
anatofuz
parents:
diff changeset
440
anatofuz
parents:
diff changeset
441 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
anatofuz
parents:
diff changeset
442 ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_lo_hi:
anatofuz
parents:
diff changeset
443 ; SI: ; %bb.0:
anatofuz
parents:
diff changeset
444 ; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
anatofuz
parents:
diff changeset
445 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
anatofuz
parents:
diff changeset
446 ; SI-NEXT: s_mov_b32 s3, 0xf000
anatofuz
parents:
diff changeset
447 ; SI-NEXT: s_mov_b32 s2, 0
anatofuz
parents:
diff changeset
448 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
anatofuz
parents:
diff changeset
449 ; SI-NEXT: v_mov_b32_e32 v1, 0
anatofuz
parents:
diff changeset
450 ; SI-NEXT: s_mov_b64 s[6:7], s[2:3]
anatofuz
parents:
diff changeset
451 ; SI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
452 ; SI-NEXT: s_mov_b64 s[0:1], s[10:11]
anatofuz
parents:
diff changeset
453 ; SI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
anatofuz
parents:
diff changeset
454 ; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
anatofuz
parents:
diff changeset
455 ; SI-NEXT: s_mov_b64 s[10:11], s[2:3]
anatofuz
parents:
diff changeset
456 ; SI-NEXT: s_waitcnt vmcnt(0)
anatofuz
parents:
diff changeset
457 ; SI-NEXT: v_cvt_pkrtz_f16_f32_e64 v2, -v2, -v3
anatofuz
parents:
diff changeset
458 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[8:11], 0 addr64
anatofuz
parents:
diff changeset
459 ; SI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
460 ;
anatofuz
parents:
diff changeset
461 ; VI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_lo_hi:
anatofuz
parents:
diff changeset
462 ; VI: ; %bb.0:
anatofuz
parents:
diff changeset
463 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
anatofuz
parents:
diff changeset
464 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
anatofuz
parents:
diff changeset
465 ; VI-NEXT: v_lshlrev_b32_e32 v4, 2, v0
anatofuz
parents:
diff changeset
466 ; VI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
467 ; VI-NEXT: v_mov_b32_e32 v1, s7
anatofuz
parents:
diff changeset
468 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v4
anatofuz
parents:
diff changeset
469 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
anatofuz
parents:
diff changeset
470 ; VI-NEXT: v_mov_b32_e32 v3, s1
anatofuz
parents:
diff changeset
471 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v4
anatofuz
parents:
diff changeset
472 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
473 ; VI-NEXT: flat_load_dword v0, v[0:1]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
474 ; VI-NEXT: flat_load_dword v1, v[2:3]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
475 ; VI-NEXT: v_mov_b32_e32 v5, s5
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
476 ; VI-NEXT: v_add_u32_e32 v4, vcc, s4, v4
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
477 ; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
150
anatofuz
parents:
diff changeset
478 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
479 ; VI-NEXT: v_cvt_pkrtz_f16_f32 v0, -v0, -v1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
480 ; VI-NEXT: flat_store_dword v[4:5], v0
150
anatofuz
parents:
diff changeset
481 ; VI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
482 ;
anatofuz
parents:
diff changeset
483 ; GFX9-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_lo_hi:
anatofuz
parents:
diff changeset
484 ; GFX9: ; %bb.0:
anatofuz
parents:
diff changeset
485 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
anatofuz
parents:
diff changeset
486 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
anatofuz
parents:
diff changeset
487 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0
anatofuz
parents:
diff changeset
488 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
489 ; GFX9-NEXT: v_mov_b32_e32 v1, s7
anatofuz
parents:
diff changeset
490 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v4
anatofuz
parents:
diff changeset
491 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
anatofuz
parents:
diff changeset
492 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
anatofuz
parents:
diff changeset
493 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v4
anatofuz
parents:
diff changeset
494 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
495 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
496 ; GFX9-NEXT: global_load_dword v1, v[2:3], off
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
497 ; GFX9-NEXT: v_mov_b32_e32 v5, s5
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
498 ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, s4, v4
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
499 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
150
anatofuz
parents:
diff changeset
500 ; GFX9-NEXT: s_waitcnt vmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
501 ; GFX9-NEXT: v_cvt_pkrtz_f16_f32 v0, -v0, -v1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
502 ; GFX9-NEXT: global_store_dword v[4:5], v0, off
150
anatofuz
parents:
diff changeset
503 ; GFX9-NEXT: s_endpgm
anatofuz
parents:
diff changeset
504 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
505 %tid.ext = sext i32 %tid to i64
anatofuz
parents:
diff changeset
506 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
anatofuz
parents:
diff changeset
507 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
anatofuz
parents:
diff changeset
508 %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
anatofuz
parents:
diff changeset
509 %a = load volatile float, float addrspace(1)* %a.gep
anatofuz
parents:
diff changeset
510 %b = load volatile float, float addrspace(1)* %b.gep
anatofuz
parents:
diff changeset
511 %neg.a = fsub float -0.0, %a
anatofuz
parents:
diff changeset
512 %neg.b = fsub float -0.0, %b
anatofuz
parents:
diff changeset
513 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.a, float %neg.b)
anatofuz
parents:
diff changeset
514 store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
515 ret void
anatofuz
parents:
diff changeset
516 }
anatofuz
parents:
diff changeset
517
anatofuz
parents:
diff changeset
518 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
anatofuz
parents:
diff changeset
519 ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi:
anatofuz
parents:
diff changeset
520 ; SI: ; %bb.0:
anatofuz
parents:
diff changeset
521 ; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
anatofuz
parents:
diff changeset
522 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
anatofuz
parents:
diff changeset
523 ; SI-NEXT: s_mov_b32 s3, 0xf000
anatofuz
parents:
diff changeset
524 ; SI-NEXT: s_mov_b32 s2, 0
anatofuz
parents:
diff changeset
525 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
anatofuz
parents:
diff changeset
526 ; SI-NEXT: v_mov_b32_e32 v1, 0
anatofuz
parents:
diff changeset
527 ; SI-NEXT: s_mov_b64 s[6:7], s[2:3]
anatofuz
parents:
diff changeset
528 ; SI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
529 ; SI-NEXT: s_mov_b64 s[0:1], s[10:11]
anatofuz
parents:
diff changeset
530 ; SI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
anatofuz
parents:
diff changeset
531 ; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
anatofuz
parents:
diff changeset
532 ; SI-NEXT: s_mov_b64 s[10:11], s[2:3]
anatofuz
parents:
diff changeset
533 ; SI-NEXT: s_waitcnt vmcnt(0)
anatofuz
parents:
diff changeset
534 ; SI-NEXT: v_cvt_pkrtz_f16_f32_e64 v2, -|v2|, -v3
anatofuz
parents:
diff changeset
535 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[8:11], 0 addr64
anatofuz
parents:
diff changeset
536 ; SI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
537 ;
anatofuz
parents:
diff changeset
538 ; VI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi:
anatofuz
parents:
diff changeset
539 ; VI: ; %bb.0:
anatofuz
parents:
diff changeset
540 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
anatofuz
parents:
diff changeset
541 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
anatofuz
parents:
diff changeset
542 ; VI-NEXT: v_lshlrev_b32_e32 v4, 2, v0
anatofuz
parents:
diff changeset
543 ; VI-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
544 ; VI-NEXT: v_mov_b32_e32 v1, s7
anatofuz
parents:
diff changeset
545 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v4
anatofuz
parents:
diff changeset
546 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
anatofuz
parents:
diff changeset
547 ; VI-NEXT: v_mov_b32_e32 v3, s1
anatofuz
parents:
diff changeset
548 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v4
anatofuz
parents:
diff changeset
549 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
550 ; VI-NEXT: flat_load_dword v0, v[0:1]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
551 ; VI-NEXT: flat_load_dword v1, v[2:3]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
552 ; VI-NEXT: v_mov_b32_e32 v5, s5
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
553 ; VI-NEXT: v_add_u32_e32 v4, vcc, s4, v4
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
554 ; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
150
anatofuz
parents:
diff changeset
555 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
556 ; VI-NEXT: v_cvt_pkrtz_f16_f32 v0, -|v0|, -v1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
557 ; VI-NEXT: flat_store_dword v[4:5], v0
150
anatofuz
parents:
diff changeset
558 ; VI-NEXT: s_endpgm
anatofuz
parents:
diff changeset
559 ;
anatofuz
parents:
diff changeset
560 ; GFX9-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi:
anatofuz
parents:
diff changeset
561 ; GFX9: ; %bb.0:
anatofuz
parents:
diff changeset
562 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
anatofuz
parents:
diff changeset
563 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
anatofuz
parents:
diff changeset
564 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0
anatofuz
parents:
diff changeset
565 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
anatofuz
parents:
diff changeset
566 ; GFX9-NEXT: v_mov_b32_e32 v1, s7
anatofuz
parents:
diff changeset
567 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v4
anatofuz
parents:
diff changeset
568 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
anatofuz
parents:
diff changeset
569 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
anatofuz
parents:
diff changeset
570 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v4
anatofuz
parents:
diff changeset
571 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
572 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
573 ; GFX9-NEXT: global_load_dword v1, v[2:3], off
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
574 ; GFX9-NEXT: v_mov_b32_e32 v5, s5
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
575 ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, s4, v4
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
576 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
150
anatofuz
parents:
diff changeset
577 ; GFX9-NEXT: s_waitcnt vmcnt(0)
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
578 ; GFX9-NEXT: v_cvt_pkrtz_f16_f32 v0, -|v0|, -v1
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
579 ; GFX9-NEXT: global_store_dword v[4:5], v0, off
150
anatofuz
parents:
diff changeset
580 ; GFX9-NEXT: s_endpgm
anatofuz
parents:
diff changeset
581 %tid = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
582 %tid.ext = sext i32 %tid to i64
anatofuz
parents:
diff changeset
583 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
anatofuz
parents:
diff changeset
584 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
anatofuz
parents:
diff changeset
585 %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
anatofuz
parents:
diff changeset
586 %a = load volatile float, float addrspace(1)* %a.gep
anatofuz
parents:
diff changeset
587 %b = load volatile float, float addrspace(1)* %b.gep
anatofuz
parents:
diff changeset
588 %fabs.a = call float @llvm.fabs.f32(float %a)
anatofuz
parents:
diff changeset
589 %neg.fabs.a = fsub float -0.0, %fabs.a
anatofuz
parents:
diff changeset
590 %neg.b = fsub float -0.0, %b
anatofuz
parents:
diff changeset
591 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.fabs.a, float %neg.b)
anatofuz
parents:
diff changeset
592 store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
593 ret void
anatofuz
parents:
diff changeset
594 }
anatofuz
parents:
diff changeset
595
anatofuz
parents:
diff changeset
596 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
anatofuz
parents:
diff changeset
597 declare float @llvm.fabs.f32(float) #1
anatofuz
parents:
diff changeset
598 declare i32 @llvm.amdgcn.workitem.id.x() #1
anatofuz
parents:
diff changeset
599
anatofuz
parents:
diff changeset
600
anatofuz
parents:
diff changeset
601 attributes #0 = { nounwind }
anatofuz
parents:
diff changeset
602 attributes #1 = { nounwind readnone }