annotate test/CodeGen/AMDGPU/half.ll @ 128:c347d3398279 default tip

fix
author mir3636
date Wed, 06 Dec 2017 14:37:17 +0900
parents 803732b1fca8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
4 ; half args should be promoted to float for SI and lower.
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
5
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 ; GCN-LABEL: {{^}}load_f16_arg:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 ; GCN: s_load_dword [[ARG:s[0-9]+]]
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
8 ; SI: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]]
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
9 ; VI: v_trunc_f16_e32 [[CVT:v[0-9]+]], [[ARG]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 ; GCN: buffer_store_short [[CVT]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
11 define amdgpu_kernel void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 store half %arg, half addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
15
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 ; GCN-LABEL: {{^}}load_v2f16_arg:
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
17 ; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
18 ; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
19 ; GCN: v_lshlrev_b32_e32 [[HI:v[0-9]+]], 16, [[V1]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
20 ; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[V0]], [[HI]]
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
21 ; GCN: buffer_store_dword [[PACKED]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
23 define amdgpu_kernel void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 store <2 x half> %arg, <2 x half> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
27
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 ; GCN-LABEL: {{^}}load_v3f16_arg:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 ; GCN: buffer_load_ushort
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 ; GCN: buffer_load_ushort
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 ; GCN: buffer_load_ushort
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 ; GCN-NOT: buffer_load
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 ; GCN-DAG: buffer_store_dword
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 ; GCN-DAG: buffer_store_short
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 ; GCN-NOT: buffer_store
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
37 define amdgpu_kernel void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 store <3 x half> %arg, <3 x half> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
41
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 ; GCN-LABEL: {{^}}load_v4f16_arg:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 ; GCN: buffer_load_ushort
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
44 ; GCN: buffer_load_ushort
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 ; GCN: buffer_load_ushort
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 ; GCN: buffer_load_ushort
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
47 ; GCN: buffer_store_dwordx2
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
48 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
49 define amdgpu_kernel void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 store <4 x half> %arg, <4 x half> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
51 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
53
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 ; GCN-LABEL: {{^}}load_v8f16_arg:
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
55 define amdgpu_kernel void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 store <8 x half> %arg, <8 x half> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
57 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
58 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
59
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
60 ; GCN-LABEL: {{^}}extload_v2f16_arg:
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
61 define amdgpu_kernel void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
62 %fpext = fpext <2 x half> %in to <2 x float>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
63 store <2 x float> %fpext, <2 x float> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
64 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
66
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
67 ; GCN-LABEL: {{^}}extload_f16_to_f32_arg:
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
68 define amdgpu_kernel void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
69 %ext = fpext half %arg to float
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
70 store float %ext, float addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
73
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
74 ; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg:
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
75 define amdgpu_kernel void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
76 %ext = fpext <2 x half> %arg to <2 x float>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
77 store <2 x float> %ext, <2 x float> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
78 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
79 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
80
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
81 ; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
82 ; GCN: buffer_load_ushort
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
83 ; GCN: buffer_load_ushort
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
84 ; GCN: buffer_load_ushort
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
85 ; GCN-NOT: buffer_load
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
86 ; GCN: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
87 ; GCN: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
88 ; GCN: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
89 ; GCN-NOT: v_cvt_f32_f16
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
90 ; GCN-DAG: buffer_store_dword
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
91 ; GCN-DAG: buffer_store_dwordx2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
92 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
93 define amdgpu_kernel void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
94 %ext = fpext <3 x half> %arg to <3 x float>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
95 store <3 x float> %ext, <3 x float> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
96 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
97 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
98
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
99 ; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg:
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
100 define amdgpu_kernel void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
101 %ext = fpext <4 x half> %arg to <4 x float>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
102 store <4 x float> %ext, <4 x float> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
103 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
104 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
105
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
106 ; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
100
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
107 ; GCN: buffer_load_ushort
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
108 ; GCN: buffer_load_ushort
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
109 ; GCN: buffer_load_ushort
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
110 ; GCN: buffer_load_ushort
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
111 ; GCN: buffer_load_ushort
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
112 ; GCN: buffer_load_ushort
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
113 ; GCN: buffer_load_ushort
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
114 ; GCN: buffer_load_ushort
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
115
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
116 ; GCN: v_cvt_f32_f16_e32
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
117 ; GCN: v_cvt_f32_f16_e32
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
118 ; GCN: v_cvt_f32_f16_e32
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
119 ; GCN: v_cvt_f32_f16_e32
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
120 ; GCN: v_cvt_f32_f16_e32
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
121 ; GCN: v_cvt_f32_f16_e32
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
122 ; GCN: v_cvt_f32_f16_e32
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
123 ; GCN: v_cvt_f32_f16_e32
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
124
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
125 ; GCN: buffer_store_dwordx4
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
126 ; GCN: buffer_store_dwordx4
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
127 define amdgpu_kernel void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
128 %ext = fpext <8 x half> %arg to <8 x float>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
129 store <8 x float> %ext, <8 x float> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
130 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
131 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
132
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
133 ; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
134 ; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
135 ; SI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
136 ; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}}
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
137 ; VI: v_trunc_f16_e32 v[[VARG:[0-9]+]], [[ARG]]
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
138 ; VI: v_cvt_f32_f16_e32 v[[VARG_F32:[0-9]+]], v[[VARG]]
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
139 ; VI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], v[[VARG_F32]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
140 ; GCN: buffer_store_dwordx2 [[RESULT]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
141 define amdgpu_kernel void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
142 %ext = fpext half %arg to double
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
143 store double %ext, double addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
144 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
145 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
146
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
147 ; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
148 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
149 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
150 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
151 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
152 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
153 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
154 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
155 define amdgpu_kernel void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
156 %ext = fpext <2 x half> %arg to <2 x double>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
157 store <2 x double> %ext, <2 x double> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
158 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
159 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
160
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
161 ; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
162 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
163 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
164 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
165 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
166 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
167 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
168 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
169 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
170 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
171 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
172 define amdgpu_kernel void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
173 %ext = fpext <3 x half> %arg to <3 x double>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
174 store <3 x double> %ext, <3 x double> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
175 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
176 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
177
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
178 ; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
179 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
180 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
181 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
182 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
183 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
184 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
185 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
186 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
187 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
188 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
189 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
190 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
191 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
192 define amdgpu_kernel void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
193 %ext = fpext <4 x half> %arg to <4 x double>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
194 store <4 x double> %ext, <4 x double> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
195 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
196 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
197
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
198 ; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
199 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
200 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
201 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
202 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
203
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
204 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
205 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
206 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
207 ; GCN-DAG: buffer_load_ushort v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
208
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
209 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
210 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
211 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
212 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
213
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
214 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
215 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
216 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
217 ; GCN-DAG: v_cvt_f32_f16_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
218
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
219 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
220 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
221 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
222 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
223
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
224 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
225 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
226 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
227 ; GCN-DAG: v_cvt_f64_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
228
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
229 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
230 define amdgpu_kernel void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
231 %ext = fpext <8 x half> %arg to <8 x double>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
232 store <8 x double> %ext, <8 x double> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
233 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
234 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
235
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
236 ; GCN-LABEL: {{^}}global_load_store_f16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
237 ; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
238 ; GCN: buffer_store_short [[TMP]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
239 define amdgpu_kernel void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
240 %val = load half, half addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
241 store half %val, half addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
242 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
243 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
244
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
245 ; GCN-LABEL: {{^}}global_load_store_v2f16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
246 ; GCN: buffer_load_dword [[TMP:v[0-9]+]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
247 ; GCN: buffer_store_dword [[TMP]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
248 define amdgpu_kernel void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
249 %val = load <2 x half>, <2 x half> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
250 store <2 x half> %val, <2 x half> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
251 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
252 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
253
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
254 ; GCN-LABEL: {{^}}global_load_store_v4f16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
255 ; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
256 ; GCN: buffer_store_dwordx2 [[TMP]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
257 define amdgpu_kernel void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
258 %val = load <4 x half>, <4 x half> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
259 store <4 x half> %val, <4 x half> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
260 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
261 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
262
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
263 ; GCN-LABEL: {{^}}global_load_store_v8f16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
264 ; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
265 ; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
266 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
267 define amdgpu_kernel void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
268 %val = load <8 x half>, <8 x half> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
269 store <8 x half> %val, <8 x half> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
270 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
271 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
272
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
273 ; GCN-LABEL: {{^}}global_extload_f16_to_f32:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
274 ; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
275 ; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
276 ; GCN: buffer_store_dword [[CVT]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
277 define amdgpu_kernel void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
278 %val = load half, half addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
279 %cvt = fpext half %val to float
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
280 store float %cvt, float addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
281 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
282 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
283
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
284 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
285 ; GCN: buffer_load_dword [[LOAD:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
286 ; GCN: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
287 ; SI: v_lshrrev_b32_e32 [[HI:v[0-9]+]], 16, [[LOAD]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
288 ; SI: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[HI]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
289 ; VI: v_cvt_f32_f16_sdwa v[[CVT1:[0-9]+]], [[LOAD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
290 ; GCN: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
291 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
292 define amdgpu_kernel void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
293 %val = load <2 x half>, <2 x half> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
294 %cvt = fpext <2 x half> %val to <2 x float>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
295 store <2 x float> %cvt, <2 x float> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
296 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
297 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
298
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
299 ; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32:
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
300 define amdgpu_kernel void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
301 %val = load <3 x half>, <3 x half> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
302 %cvt = fpext <3 x half> %val to <3 x float>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
303 store <3 x float> %cvt, <3 x float> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
304 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
305 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
306
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
307 ; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32:
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
308 define amdgpu_kernel void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
309 %val = load <4 x half>, <4 x half> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
310 %cvt = fpext <4 x half> %val to <4 x float>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
311 store <4 x float> %cvt, <4 x float> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
312 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
313 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
314
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
315 ; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32:
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
316 define amdgpu_kernel void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
317 %val = load <8 x half>, <8 x half> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
318 %cvt = fpext <8 x half> %val to <8 x float>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
319 store <8 x float> %cvt, <8 x float> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
320 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
321 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
322
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
323 ; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
324 ; GCN: buffer_load_dwordx4
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
325 ; GCN: buffer_load_dwordx4
100
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
326
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
327 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
328 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
329 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
330 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
331 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
332 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
333 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
334 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
335 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
336 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
337 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
338 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
339 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
340 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
341 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
342 ; SI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
343
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
344 ; VI: v_cvt_f32_f16_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
345 ; VI: v_cvt_f32_f16_sdwa
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
346 ; ...
100
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
347
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
348 ; GCN: buffer_store_dwordx4
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
349 ; GCN: buffer_store_dwordx4
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
350 ; GCN: buffer_store_dwordx4
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
351 ; GCN: buffer_store_dwordx4
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
352
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
353 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
354 define amdgpu_kernel void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
355 %val = load <16 x half>, <16 x half> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
356 %cvt = fpext <16 x half> %val to <16 x float>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
357 store <16 x float> %cvt, <16 x float> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
358 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
359 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
360
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
361 ; GCN-LABEL: {{^}}global_extload_f16_to_f64:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
362 ; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
363 ; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
364 ; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
365 ; GCN: buffer_store_dwordx2 [[CVT1]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
366 define amdgpu_kernel void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
367 %val = load half, half addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
368 %cvt = fpext half %val to double
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
369 store double %cvt, double addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
370 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
371 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
372
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
373 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
374 ; GCN-DAG: buffer_load_dword [[LOAD:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
375
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
376 ; SI-DAG: v_lshrrev_b32_e32 [[HI:v[0-9]+]], 16, [[LOAD]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
377 ; SI-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
378 ; SI-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[HI]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
379 ; SI-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
380 ; SI-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT1]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
381
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
382 ; VI-DAG: v_cvt_f32_f16_sdwa v[[CVT0:[0-9]+]], [[LOAD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
383 ; VI-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
384 ; VI-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT0]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
385 ; VI-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT1]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
386
100
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
387 ; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[CVT2_LO]]:[[CVT3_HI]]{{\]}}
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
388 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
389 define amdgpu_kernel void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
390 %val = load <2 x half>, <2 x half> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
391 %cvt = fpext <2 x half> %val to <2 x double>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
392 store <2 x double> %cvt, <2 x double> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
393 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
394 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
395
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
396 ; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
100
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
397
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
398 ; XSI: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
399 ; XSI: v_cvt_f32_f16_e32
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
400 ; XSI: v_cvt_f32_f16_e32
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
401 ; XSI-DAG: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
402 ; XSI: v_cvt_f32_f16_e32
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
403 ; XSI-NOT: v_cvt_f32_f16
100
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
404
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
405 ; XVI: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
406 ; XVI: v_cvt_f32_f16_e32
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
407 ; XVI: v_cvt_f32_f16_e32
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
408 ; XVI: v_cvt_f32_f16_sdwa
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
409 ; XVI-NOT: v_cvt_f32_f16
100
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
410
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
411 ; GCN: buffer_load_dwordx2 v{{\[}}[[IN_LO:[0-9]+]]:[[IN_HI:[0-9]+]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
412 ; GCN-DAG: v_cvt_f32_f16_e32 [[Z32:v[0-9]+]], v[[IN_HI]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
413 ; GCN-DAG: v_cvt_f32_f16_e32 [[X32:v[0-9]+]], v[[IN_LO]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
414 ; SI: v_lshrrev_b32_e32 [[Y16:v[0-9]+]], 16, v[[IN_LO]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
415 ; SI-DAG: v_cvt_f32_f16_e32 [[Y32:v[0-9]+]], [[Y16]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
416 ; VI-DAG: v_cvt_f32_f16_sdwa [[Y32:v[0-9]+]], v[[IN_LO]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
417
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
418 ; GCN-DAG: v_cvt_f64_f32_e32 [[Z:v\[[0-9]+:[0-9]+\]]], [[Z32]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
419 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[XLO:[0-9]+]]:{{[0-9]+}}], [[X32]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
420 ; GCN-DAG: v_cvt_f64_f32_e32 v[{{[0-9]+}}:[[YHI:[0-9]+]]{{\]}}, [[Y32]]
100
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
421 ; GCN-NOT: v_cvt_f64_f32_e32
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
422
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
423 ; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[XLO]]:[[YHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
424 ; GCN-DAG: buffer_store_dwordx2 [[Z]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
100
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
425 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
426 define amdgpu_kernel void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
427 %val = load <3 x half>, <3 x half> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
428 %cvt = fpext <3 x half> %val to <3 x double>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
429 store <3 x double> %cvt, <3 x double> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
430 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
431 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
432
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
433 ; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64:
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
434 define amdgpu_kernel void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
435 %val = load <4 x half>, <4 x half> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
436 %cvt = fpext <4 x half> %val to <4 x double>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
437 store <4 x double> %cvt, <4 x double> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
438 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
439 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
440
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
441 ; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64:
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
442 define amdgpu_kernel void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
443 %val = load <8 x half>, <8 x half> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
444 %cvt = fpext <8 x half> %val to <8 x double>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
445 store <8 x double> %cvt, <8 x double> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
446 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
447 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
448
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
449 ; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64:
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
450 define amdgpu_kernel void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
451 %val = load <16 x half>, <16 x half> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
452 %cvt = fpext <16 x half> %val to <16 x double>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
453 store <16 x double> %cvt, <16 x double> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
454 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
455 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
456
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
457 ; GCN-LABEL: {{^}}global_truncstore_f32_to_f16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
458 ; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
459 ; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
460 ; GCN: buffer_store_short [[CVT]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
461 define amdgpu_kernel void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
462 %val = load float, float addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
463 %cvt = fptrunc float %val to half
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
464 store half %cvt, half addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
465 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
466 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
467
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
468 ; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
469 ; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
470 ; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
471
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
472 ; SI-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
473 ; SI-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[CVT1]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
474 ; SI: v_or_b32_e32 [[PACKED:v[0-9]+]], [[CVT0]], [[SHL]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
475
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
476 ; VI-DAG: v_cvt_f16_f32_sdwa [[CVT1:v[0-9]+]], v[[HI]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
477 ; VI: v_or_b32_e32 [[PACKED:v[0-9]+]], [[CVT0]], [[CVT1]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
478
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
479 ; GCN-DAG: buffer_store_dword [[PACKED]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
480 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
481 define amdgpu_kernel void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
482 %val = load <2 x float>, <2 x float> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
483 %cvt = fptrunc <2 x float> %val to <2 x half>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
484 store <2 x half> %cvt, <2 x half> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
485 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
486 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
487
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
488 ; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
489 ; GCN: buffer_load_dwordx4
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
490 ; GCN-DAG: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
491 ; SI-DAG: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
492 ; VI-DAG: v_cvt_f16_f32_sdwa
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
493 ; GCN-DAG: v_cvt_f16_f32_e32
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
494 ; GCN: buffer_store_short
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
495 ; GCN: buffer_store_dword
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
496 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
497 define amdgpu_kernel void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
498 %val = load <3 x float>, <3 x float> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
499 %cvt = fptrunc <3 x float> %val to <3 x half>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
500 store <3 x half> %cvt, <3 x half> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
501 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
502 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
503
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
504 ; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
505 ; GCN: buffer_load_dwordx4
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
506 ; GCN-DAG: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
507 ; SI-DAG: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
508 ; SI-DAG: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
509 ; VI-DAG: v_cvt_f16_f32_sdwa
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
510 ; VI-DAG: v_cvt_f16_f32_sdwa
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
511 ; GCN-DAG: v_cvt_f16_f32_e32
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
512 ; GCN: buffer_store_dwordx2
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
513 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
514 define amdgpu_kernel void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
515 %val = load <4 x float>, <4 x float> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
516 %cvt = fptrunc <4 x float> %val to <4 x half>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
517 store <4 x half> %cvt, <4 x half> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
518 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
519 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
520
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
521 ; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
100
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
522 ; GCN: buffer_load_dwordx4
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
523 ; GCN: buffer_load_dwordx4
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
524 ; SI: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
525 ; SI: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
526 ; SI: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
527 ; SI: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
528 ; SI: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
529 ; SI: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
530 ; SI: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
531 ; SI: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
532 ; VI-DAG: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
533 ; VI-DAG: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
534 ; VI-DAG: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
535 ; VI-DAG: v_cvt_f16_f32_e32
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
536 ; VI-DAG: v_cvt_f16_f32_sdwa
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
537 ; VI-DAG: v_cvt_f16_f32_sdwa
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
538 ; VI-DAG: v_cvt_f16_f32_sdwa
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
539 ; VI-DAG: v_cvt_f16_f32_sdwa
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
540 ; GCN: buffer_store_dwordx4
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
541 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
542 define amdgpu_kernel void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
543 %val = load <8 x float>, <8 x float> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
544 %cvt = fptrunc <8 x float> %val to <8 x half>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
545 store <8 x half> %cvt, <8 x half> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
546 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
547 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
548
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
549 ; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
100
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
550 ; GCN: buffer_load_dwordx4
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
551 ; GCN: buffer_load_dwordx4
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
552 ; GCN: buffer_load_dwordx4
7d135dc70f03 LLVM 3.9
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents: 95
diff changeset
553 ; GCN: buffer_load_dwordx4
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
554 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
555 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
556 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
557 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
558 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
559 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
560 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
561 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
562 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
563 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
564 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
565 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
566 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
567 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
568 ; GCN-DAG: v_cvt_f16_f32_e32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
569 ; GCN-DAG: v_cvt_f16_f32_e32
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
570 ; GCN-DAG: buffer_store_dwordx4
1172e4bd9c6f update 4.0.0
mir3636
parents: 100
diff changeset
571 ; GCN-DAG: buffer_store_dwordx4
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
572 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
573 define amdgpu_kernel void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
574 %val = load <16 x float>, <16 x float> addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
575 %cvt = fptrunc <16 x float> %val to <16 x half>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
576 store <16 x half> %cvt, <16 x half> addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
577 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
578 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
579
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
580 ; FIXME: Unsafe math should fold conversions away
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
581 ; GCN-LABEL: {{^}}fadd_f16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
582 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
583 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
584 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
585 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
586 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
587 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
588 define amdgpu_kernel void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
589 %add = fadd half %a, %b
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
590 store half %add, half addrspace(1)* %out, align 4
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
591 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
592 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
593
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
594 ; GCN-LABEL: {{^}}fadd_v2f16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
595 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
596 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
597 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
598 define amdgpu_kernel void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
599 %add = fadd <2 x half> %a, %b
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
600 store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
601 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
602 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
603
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
604 ; GCN-LABEL: {{^}}fadd_v4f16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
605 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
606 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
607 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
608 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
609 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
610 define amdgpu_kernel void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
611 %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
612 %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
613 %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
614 %result = fadd <4 x half> %a, %b
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
615 store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
616 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
617 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
618
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
619 ; GCN-LABEL: {{^}}fadd_v8f16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
620 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
621 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
622 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
623 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
624 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
625 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
626 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
627 ; SI: v_add_f32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
628 ; GCN: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
629 define amdgpu_kernel void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
630 %add = fadd <8 x half> %a, %b
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
631 store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
632 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
633 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
634
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
635 ; GCN-LABEL: {{^}}test_bitcast_from_half:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
636 ; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
637 ; GCN: buffer_store_short [[TMP]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
638 define amdgpu_kernel void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
639 %val = load half, half addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
640 %val_int = bitcast half %val to i16
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
641 store i16 %val_int, i16 addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
642 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
643 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
644
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
645 ; GCN-LABEL: {{^}}test_bitcast_to_half:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
646 ; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
647 ; GCN: buffer_store_short [[TMP]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
648 define amdgpu_kernel void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
649 %val = load i16, i16 addrspace(1)* %in
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
650 %val_fp = bitcast i16 %val to half
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
651 store half %val_fp, half addrspace(1)* %out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
652 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
653 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
654
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
655 attributes #0 = { nounwind }