annotate llvm/test/CodeGen/AMDGPU/addrspacecast.ll @ 252:1f2b6ac9f198 llvm-original

LLVM16-1
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Fri, 18 Aug 2023 09:04:13 +0900
parents c4bab56944e8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
1 ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
2 ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s
150
anatofuz
parents:
diff changeset
3
anatofuz
parents:
diff changeset
4 ; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
anatofuz
parents:
diff changeset
5 ; HSA: enable_sgpr_private_segment_buffer = 1
anatofuz
parents:
diff changeset
6 ; HSA: enable_sgpr_dispatch_ptr = 0
anatofuz
parents:
diff changeset
7 ; CI: enable_sgpr_queue_ptr = 1
anatofuz
parents:
diff changeset
8 ; GFX9: enable_sgpr_queue_ptr = 0
anatofuz
parents:
diff changeset
9
anatofuz
parents:
diff changeset
10 ; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
anatofuz
parents:
diff changeset
11 ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
12 ; CI-DAG: s_cmp_lg_u32 [[PTR]], -1
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
13 ; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
14 ; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0
150
anatofuz
parents:
diff changeset
15
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
16 ; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_shared_base
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
17
150
anatofuz
parents:
diff changeset
18 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
anatofuz
parents:
diff changeset
19 ; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
anatofuz
parents:
diff changeset
20
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
21 ; GFX9: s_cmp_lg_u32 [[PTR]], -1
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
22 ; GFX9-DAG: s_cselect_b32 s[[LO:[0-9]+]], s[[HIBASE]], 0
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
23 ; GFX9-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[PTR]], 0
150
anatofuz
parents:
diff changeset
24
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
25 ; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
150
anatofuz
parents:
diff changeset
26
anatofuz
parents:
diff changeset
27 ; At most 2 digits. Make sure src_shared_base is not counted as a high
anatofuz
parents:
diff changeset
28 ; number SGPR.
anatofuz
parents:
diff changeset
29
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
30 ; HSA: NumSgprs: {{[0-9]+}}
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
31 define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #0 {
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
32 %stof = addrspacecast ptr addrspace(3) %ptr to ptr
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
33 store volatile i32 7, ptr %stof
150
anatofuz
parents:
diff changeset
34 ret void
anatofuz
parents:
diff changeset
35 }
anatofuz
parents:
diff changeset
36
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
37 ; Test handling inside a non-kernel
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
38 ; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast_func:
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
39 ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[6:7], 0x10{{$}}
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
40 ; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
41 ; CI-DAG: v_cmp_ne_u32_e32 vcc, -1, v0
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
42 ; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
43 ; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
44
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
45 ; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_shared_base
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
46
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
47 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
48
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
49 ; GFX9-DAG: v_mov_b32_e32 v[[VREG_HIBASE:[0-9]+]], s[[HIBASE]]
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
50 ; GFX9-DAG: v_cmp_ne_u32_e32 vcc, -1, v0
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
51 ; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0, vcc
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
52 ; GFX9-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, v[[VREG_HIBASE]], vcc
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
53
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
54 ; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
55 define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
56 %stof = addrspacecast ptr addrspace(3) %ptr to ptr
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
57 store volatile i32 7, ptr %stof
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
58 ret void
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
59 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
60
150
anatofuz
parents:
diff changeset
61 ; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast:
anatofuz
parents:
diff changeset
62 ; HSA: enable_sgpr_private_segment_buffer = 1
anatofuz
parents:
diff changeset
63 ; HSA: enable_sgpr_dispatch_ptr = 0
anatofuz
parents:
diff changeset
64 ; CI: enable_sgpr_queue_ptr = 1
anatofuz
parents:
diff changeset
65 ; GFX9: enable_sgpr_queue_ptr = 0
anatofuz
parents:
diff changeset
66
anatofuz
parents:
diff changeset
67 ; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
anatofuz
parents:
diff changeset
68 ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
anatofuz
parents:
diff changeset
69
anatofuz
parents:
diff changeset
70 ; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
71 ; CI-DAG: s_cmp_lg_u32 [[PTR]], -1
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
72 ; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
73 ; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0
150
anatofuz
parents:
diff changeset
74
anatofuz
parents:
diff changeset
75 ; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
76 ; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_private_base
150
anatofuz
parents:
diff changeset
77
anatofuz
parents:
diff changeset
78 ; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
79 ; GFX9: s_cmp_lg_u32 [[PTR]], -1
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
80 ; GFX9: s_cselect_b32 s[[LO:[0-9]+]], s[[HIBASE]], 0
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
81 ; GFX9: s_cselect_b32 s[[HI:[0-9]+]], [[PTR]], 0
150
anatofuz
parents:
diff changeset
82
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
83 ; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
150
anatofuz
parents:
diff changeset
84
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
85 ; HSA: NumSgprs: {{[0-9]+}}
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
86 define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 {
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
87 %stof = addrspacecast ptr addrspace(5) %ptr to ptr
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
88 store volatile i32 7, ptr %stof
150
anatofuz
parents:
diff changeset
89 ret void
anatofuz
parents:
diff changeset
90 }
anatofuz
parents:
diff changeset
91
anatofuz
parents:
diff changeset
92 ; no-op
anatofuz
parents:
diff changeset
93 ; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast:
anatofuz
parents:
diff changeset
94 ; HSA: enable_sgpr_queue_ptr = 0
anatofuz
parents:
diff changeset
95
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
96 ; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
150
anatofuz
parents:
diff changeset
97 ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
anatofuz
parents:
diff changeset
98 ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
anatofuz
parents:
diff changeset
99 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
100 ; HSA: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
101 define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #0 {
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
102 %stof = addrspacecast ptr addrspace(1) %ptr to ptr
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
103 store volatile i32 7, ptr %stof
150
anatofuz
parents:
diff changeset
104 ret void
anatofuz
parents:
diff changeset
105 }
anatofuz
parents:
diff changeset
106
anatofuz
parents:
diff changeset
107 ; no-op
anatofuz
parents:
diff changeset
108 ; HSA-LABEl: {{^}}use_constant_to_flat_addrspacecast:
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
109 ; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
150
anatofuz
parents:
diff changeset
110 ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
anatofuz
parents:
diff changeset
111 ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
112 ; HSA: flat_load_dword v{{[0-9]+}}, v[[[VPTRLO]]:[[VPTRHI]]]
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
113 define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #0 {
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
114 %stof = addrspacecast ptr addrspace(4) %ptr to ptr
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
115 %ld = load volatile i32, ptr %stof
150
anatofuz
parents:
diff changeset
116 ret void
anatofuz
parents:
diff changeset
117 }
anatofuz
parents:
diff changeset
118
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
119 ; HSA-LABEl: {{^}}use_constant_to_global_addrspacecast:
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
120 ; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
121 ; CI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
122 ; CI-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
123 ; CI: {{flat|global}}_load_dword v{{[0-9]+}}, v[[[VPTRLO]]:[[VPTRHI]]]
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
124
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
125 ; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
126 ; GFX9: global_load_dword v{{[0-9]+}}, [[ZERO:v[0-9]+]], s[[[PTRLO]]:[[PTRHI]]]
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
127 define amdgpu_kernel void @use_constant_to_global_addrspacecast(ptr addrspace(4) %ptr) #0 {
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
128 %stof = addrspacecast ptr addrspace(4) %ptr to ptr addrspace(1)
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
129 %ld = load volatile i32, ptr addrspace(1) %stof
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
130 ret void
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
131 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
132
150
anatofuz
parents:
diff changeset
133 ; HSA-LABEL: {{^}}use_flat_to_group_addrspacecast:
anatofuz
parents:
diff changeset
134 ; HSA: enable_sgpr_private_segment_buffer = 1
anatofuz
parents:
diff changeset
135 ; HSA: enable_sgpr_dispatch_ptr = 0
anatofuz
parents:
diff changeset
136 ; HSA: enable_sgpr_queue_ptr = 0
anatofuz
parents:
diff changeset
137
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
138 ; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]]
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
139 ; CI-DAG: v_cmp_ne_u64_e64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}}
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
140 ; CI-DAG: s_and_b64 s{{[[0-9]+:[0-9]+]}}, s[[[CMP_LO]]:[[CMP_HI]]], exec
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
141 ; CI-DAG: s_cselect_b32 [[CASTPTR:s[0-9]+]], s[[PTR_LO]], -1
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
142 ; CI-DAG: v_mov_b32_e32 [[VCASTPTR:v[0-9]+]], [[CASTPTR]]
150
anatofuz
parents:
diff changeset
143 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
144 ; GFX9-DAG: s_cmp_lg_u64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], 0
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
145 ; GFX9-DAG: s_cselect_b32 s[[PTR_LO]], s[[PTR_LO]], -1
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
146 ; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]]
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
147 ; CI-DAG: ds_write_b32 [[VCASTPTR]], v[[K]]
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
148 ; GFX9-DAG: ds_write_b32 [[CASTPTR]], v[[K]]
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
149 define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #0 {
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
150 %ftos = addrspacecast ptr %ptr to ptr addrspace(3)
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
151 store volatile i32 0, ptr addrspace(3) %ftos
150
anatofuz
parents:
diff changeset
152 ret void
anatofuz
parents:
diff changeset
153 }
anatofuz
parents:
diff changeset
154
anatofuz
parents:
diff changeset
155 ; HSA-LABEL: {{^}}use_flat_to_private_addrspacecast:
anatofuz
parents:
diff changeset
156 ; HSA: enable_sgpr_private_segment_buffer = 1
anatofuz
parents:
diff changeset
157 ; HSA: enable_sgpr_dispatch_ptr = 0
anatofuz
parents:
diff changeset
158 ; HSA: enable_sgpr_queue_ptr = 0
anatofuz
parents:
diff changeset
159
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
160 ; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]]
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
161 ; CI-DAG v_cmp_ne_u64_e64 vcc, s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}}
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
162 ; CI-DAG v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
163 ; CI-DAG v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
164 ; CI-DAG: v_cmp_ne_u64_e64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}}
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
165 ; CI-DAG: s_and_b64 s{{[[0-9]+:[0-9]+]}}, s[[[CMP_LO]]:[[CMP_HI]]], exec
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
166 ; CI-DAG: s_cselect_b32 [[CASTPTR:s[0-9]+]], s[[PTR_LO]], -1
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
167 ; CI-DAG: v_mov_b32_e32 [[VCASTPTR:v[0-9]+]], [[CASTPTR]]
150
anatofuz
parents:
diff changeset
168 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
169 ; GFX9-DAG: s_cmp_lg_u64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], 0
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
170 ; GFX9-DAG: s_cselect_b32 s[[PTR_LO]], s[[PTR_LO]], -1
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
171 ; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]]
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
172 ; CI: buffer_store_dword v[[K]], [[VCASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
173 ; GFX9: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
174 define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #0 {
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
175 %ftos = addrspacecast ptr %ptr to ptr addrspace(5)
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
176 store volatile i32 0, ptr addrspace(5) %ftos
150
anatofuz
parents:
diff changeset
177 ret void
anatofuz
parents:
diff changeset
178 }
anatofuz
parents:
diff changeset
179
anatofuz
parents:
diff changeset
180 ; HSA-LABEL: {{^}}use_flat_to_global_addrspacecast:
anatofuz
parents:
diff changeset
181 ; HSA: enable_sgpr_queue_ptr = 0
anatofuz
parents:
diff changeset
182
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
183 ; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
184 ; CI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
185 ; CI-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
186 ; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
187 ; CI: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
188
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
189 ; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
190 ; GFX9: global_store_dword [[ZERO]], [[ZERO]], s[[[PTRLO]]:[[PTRHI]]{{\]$}}
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
191 define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #0 {
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
192 %ftos = addrspacecast ptr %ptr to ptr addrspace(1)
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
193 store volatile i32 0, ptr addrspace(1) %ftos
150
anatofuz
parents:
diff changeset
194 ret void
anatofuz
parents:
diff changeset
195 }
anatofuz
parents:
diff changeset
196
anatofuz
parents:
diff changeset
197 ; HSA-LABEL: {{^}}use_flat_to_constant_addrspacecast:
anatofuz
parents:
diff changeset
198 ; HSA: enable_sgpr_queue_ptr = 0
anatofuz
parents:
diff changeset
199
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
200 ; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
201 ; HSA: s_load_dword s{{[0-9]+}}, s[[[PTRLO]]:[[PTRHI]]], 0x0
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
202 define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
203 %ftos = addrspacecast ptr %ptr to ptr addrspace(4)
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
204 load volatile i32, ptr addrspace(4) %ftos
150
anatofuz
parents:
diff changeset
205 ret void
anatofuz
parents:
diff changeset
206 }
anatofuz
parents:
diff changeset
207
anatofuz
parents:
diff changeset
208 ; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
anatofuz
parents:
diff changeset
209 ; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
anatofuz
parents:
diff changeset
210 ; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
anatofuz
parents:
diff changeset
211
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
212 ; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
150
anatofuz
parents:
diff changeset
213
anatofuz
parents:
diff changeset
214 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
215 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
216 ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
150
anatofuz
parents:
diff changeset
217 define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
218 %cast = addrspacecast ptr addrspace(3) null to ptr
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
219 store volatile i32 7, ptr %cast
150
anatofuz
parents:
diff changeset
220 ret void
anatofuz
parents:
diff changeset
221 }
anatofuz
parents:
diff changeset
222
anatofuz
parents:
diff changeset
223 ; HSA-LABEL: {{^}}cast_0_flat_to_group_addrspacecast:
anatofuz
parents:
diff changeset
224 ; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
anatofuz
parents:
diff changeset
225 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
anatofuz
parents:
diff changeset
226 ; HSA: ds_write_b32 [[PTR]], [[K]]
anatofuz
parents:
diff changeset
227 define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
228 %cast = addrspacecast ptr null to ptr addrspace(3)
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
229 store volatile i32 7, ptr addrspace(3) %cast
150
anatofuz
parents:
diff changeset
230 ret void
anatofuz
parents:
diff changeset
231 }
anatofuz
parents:
diff changeset
232
anatofuz
parents:
diff changeset
233 ; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast:
anatofuz
parents:
diff changeset
234 ; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
223
5f17cb93ff66 LLVM13 (2021/7/18)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 221
diff changeset
235 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
5f17cb93ff66 LLVM13 (2021/7/18)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 221
diff changeset
236 ; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
237 ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
150
anatofuz
parents:
diff changeset
238 define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
239 %cast = addrspacecast ptr addrspace(3) inttoptr (i32 -1 to ptr addrspace(3)) to ptr
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
240 store volatile i32 7, ptr %cast
150
anatofuz
parents:
diff changeset
241 ret void
anatofuz
parents:
diff changeset
242 }
anatofuz
parents:
diff changeset
243
anatofuz
parents:
diff changeset
244 ; HSA-LABEL: {{^}}cast_neg1_flat_to_group_addrspacecast:
anatofuz
parents:
diff changeset
245 ; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
anatofuz
parents:
diff changeset
246 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
anatofuz
parents:
diff changeset
247 ; HSA: ds_write_b32 [[PTR]], [[K]]
anatofuz
parents:
diff changeset
248 define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
249 %cast = addrspacecast ptr inttoptr (i64 -1 to ptr) to ptr addrspace(3)
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
250 store volatile i32 7, ptr addrspace(3) %cast
150
anatofuz
parents:
diff changeset
251 ret void
anatofuz
parents:
diff changeset
252 }
anatofuz
parents:
diff changeset
253
anatofuz
parents:
diff changeset
254 ; FIXME: Shouldn't need to enable queue ptr
anatofuz
parents:
diff changeset
255 ; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
256 ; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
257 ; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
258
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
259 ; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_private_base
150
anatofuz
parents:
diff changeset
260
anatofuz
parents:
diff changeset
261 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
262 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
263 ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
150
anatofuz
parents:
diff changeset
264 define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
265 %cast = addrspacecast ptr addrspace(5) null to ptr
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
266 store volatile i32 7, ptr %cast
150
anatofuz
parents:
diff changeset
267 ret void
anatofuz
parents:
diff changeset
268 }
anatofuz
parents:
diff changeset
269
anatofuz
parents:
diff changeset
270 ; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast:
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
271 ; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
272 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
273 ; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0
150
anatofuz
parents:
diff changeset
274 define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
275 %cast = addrspacecast ptr null to ptr addrspace(5)
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
276 store volatile i32 7, ptr addrspace(5) %cast
150
anatofuz
parents:
diff changeset
277 ret void
anatofuz
parents:
diff changeset
278 }
anatofuz
parents:
diff changeset
279
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
280
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
281 ; HSA-LABEL: {{^}}cast_neg1_private_to_flat_addrspacecast:
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
282 ; CI: enable_sgpr_queue_ptr = 1
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
283 ; GFX9: enable_sgpr_queue_ptr = 0
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
284
223
5f17cb93ff66 LLVM13 (2021/7/18)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 221
diff changeset
285 ; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
286 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
223
5f17cb93ff66 LLVM13 (2021/7/18)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 221
diff changeset
287 ; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
288 ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
289 define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 {
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
290 %cast = addrspacecast ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)) to ptr
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
291 store volatile i32 7, ptr %cast
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
292 ret void
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
293 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
294
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
295 ; HSA-LABEL: {{^}}cast_neg1_flat_to_private_addrspacecast:
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
296 ; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
297 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
298 ; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
299 define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() #0 {
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
300 %cast = addrspacecast ptr inttoptr (i64 -1 to ptr) to ptr addrspace(5)
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
301 store volatile i32 7, ptr addrspace(5) %cast
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
302 ret void
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
303 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
304
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
305
150
anatofuz
parents:
diff changeset
306 ; Disable optimizations in case there are optimizations added that
anatofuz
parents:
diff changeset
307 ; specialize away generic pointer accesses.
anatofuz
parents:
diff changeset
308
anatofuz
parents:
diff changeset
309 ; HSA-LABEL: {{^}}branch_use_flat_i32:
anatofuz
parents:
diff changeset
310 ; HSA: {{flat|global}}_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}
anatofuz
parents:
diff changeset
311 ; HSA: s_endpgm
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
312 define amdgpu_kernel void @branch_use_flat_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 %x, i32 %c) #0 {
150
anatofuz
parents:
diff changeset
313 entry:
anatofuz
parents:
diff changeset
314 %cmp = icmp ne i32 %c, 0
anatofuz
parents:
diff changeset
315 br i1 %cmp, label %local, label %global
anatofuz
parents:
diff changeset
316
anatofuz
parents:
diff changeset
317 local:
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
318 %flat_local = addrspacecast ptr addrspace(3) %lptr to ptr
150
anatofuz
parents:
diff changeset
319 br label %end
anatofuz
parents:
diff changeset
320
anatofuz
parents:
diff changeset
321 global:
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
322 %flat_global = addrspacecast ptr addrspace(1) %gptr to ptr
150
anatofuz
parents:
diff changeset
323 br label %end
anatofuz
parents:
diff changeset
324
anatofuz
parents:
diff changeset
325 end:
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
326 %fptr = phi ptr [ %flat_local, %local ], [ %flat_global, %global ]
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
327 store volatile i32 %x, ptr %fptr, align 4
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
328 ; %val = load i32, ptr %fptr, align 4
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
329 ; store i32 %val, ptr addrspace(1) %out, align 4
150
anatofuz
parents:
diff changeset
330 ret void
anatofuz
parents:
diff changeset
331 }
anatofuz
parents:
diff changeset
332
anatofuz
parents:
diff changeset
333 ; Check for prologue initializing special SGPRs pointing to scratch.
anatofuz
parents:
diff changeset
334 ; HSA-LABEL: {{^}}store_flat_scratch:
anatofuz
parents:
diff changeset
335 ; CI-DAG: s_mov_b32 flat_scratch_lo, s9
223
5f17cb93ff66 LLVM13 (2021/7/18)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 221
diff changeset
336 ; CI-DAG: s_add_i32 [[ADD:s[0-9]+]], s8, s11
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
337 ; CI-DAG: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
150
anatofuz
parents:
diff changeset
338
anatofuz
parents:
diff changeset
339 ; GFX9: s_add_u32 flat_scratch_lo, s6, s9
anatofuz
parents:
diff changeset
340 ; GFX9: s_addc_u32 flat_scratch_hi, s7, 0
anatofuz
parents:
diff changeset
341
anatofuz
parents:
diff changeset
342 ; HSA: {{flat|global}}_store_dword
anatofuz
parents:
diff changeset
343 ; HSA: s_barrier
anatofuz
parents:
diff changeset
344 ; HSA: {{flat|global}}_load_dword
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
345 define amdgpu_kernel void @store_flat_scratch(ptr addrspace(1) noalias %out, i32) #0 {
150
anatofuz
parents:
diff changeset
346 %alloca = alloca i32, i32 9, align 4, addrspace(5)
anatofuz
parents:
diff changeset
347 %x = call i32 @llvm.amdgcn.workitem.id.x() #2
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
348 %pptr = getelementptr i32, ptr addrspace(5) %alloca, i32 %x
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
349 %fptr = addrspacecast ptr addrspace(5) %pptr to ptr
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
350 store volatile i32 %x, ptr %fptr
150
anatofuz
parents:
diff changeset
351 ; Dummy call
anatofuz
parents:
diff changeset
352 call void @llvm.amdgcn.s.barrier() #1
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
353 %reload = load volatile i32, ptr %fptr, align 4
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
354 store volatile i32 %reload, ptr addrspace(1) %out, align 4
150
anatofuz
parents:
diff changeset
355 ret void
anatofuz
parents:
diff changeset
356 }
anatofuz
parents:
diff changeset
357
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
358 ; HSA-LABEL: {{^}}use_constant_to_constant32_addrspacecast
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
359 ; GFX9: s_load_dwordx2 [[PTRPTR:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}}
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
360 ; GFX9: s_load_dword [[OFFSET:s[0-9]+]], s[4:5], 0x8{{$}}
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
361 ; GFX9: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]], [[PTRPTR]], 0x0{{$}}
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
362 ; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}}
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
363 ; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]]
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
364 ; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}}
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
365 define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) #0 {
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
366 %ptr = load volatile ptr addrspace(4), ptr addrspace(4) %ptr.ptr
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
367 %addrspacecast = addrspacecast ptr addrspace(4) %ptr to ptr addrspace(6)
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
368 %gep = getelementptr i8, ptr addrspace(6) %addrspacecast, i32 %offset
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
369 %load = load volatile i32, ptr addrspace(6) %gep, align 4
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
370 ret void
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
371 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
372
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
373 ; HSA-LABEL: {{^}}use_global_to_constant32_addrspacecast
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
374 ; GFX9: s_load_dwordx2 [[PTRPTR:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}}
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
375 ; GFX9: s_load_dword [[OFFSET:s[0-9]+]], s[4:5], 0x8{{$}}
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
376 ; GFX9: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]], [[PTRPTR]], 0x0{{$}}
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
377 ; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}}
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
378 ; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]]
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
379 ; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}}
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
380 define amdgpu_kernel void @use_global_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) #0 {
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
381 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) %ptr.ptr
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
382 %addrspacecast = addrspacecast ptr addrspace(1) %ptr to ptr addrspace(6)
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
383 %gep = getelementptr i8, ptr addrspace(6) %addrspacecast, i32 %offset
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
384 %load = load volatile i32, ptr addrspace(6) %gep, align 4
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
385 ret void
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
386 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
387
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
388 ; GCN-LABEL: {{^}}use_constant32bit_to_flat_addrspacecast_0:
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
389 ; GCN: s_load_dword [[PTR:s[0-9]+]],
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
390 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
391 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]]
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
392 ; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]]
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
393 define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(ptr addrspace(6) %ptr) #0 {
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
394 %stof = addrspacecast ptr addrspace(6) %ptr to ptr
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
395 %load = load volatile i32, ptr %stof
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
396 ret void
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
397 }
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
398
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
399 ; GCN-LABEL: {{^}}use_constant32bit_to_flat_addrspacecast_1:
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
400 ; GCN: s_load_dword [[PTR:s[0-9]+]],
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
401 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0xffff8000
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
402 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]]
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
403 ; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]]
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
404 define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(ptr addrspace(6) %ptr) #3 {
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
405 %stof = addrspacecast ptr addrspace(6) %ptr to ptr
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
406 %load = load volatile i32, ptr %stof
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
407 ret void
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
408 }
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
409
150
anatofuz
parents:
diff changeset
410 declare void @llvm.amdgcn.s.barrier() #1
anatofuz
parents:
diff changeset
411 declare i32 @llvm.amdgcn.workitem.id.x() #2
anatofuz
parents:
diff changeset
412
anatofuz
parents:
diff changeset
413 attributes #0 = { nounwind }
anatofuz
parents:
diff changeset
414 attributes #1 = { nounwind convergent }
anatofuz
parents:
diff changeset
415 attributes #2 = { nounwind readnone }
236
c4bab56944e8 LLVM 16
kono
parents: 223
diff changeset
416 attributes #3 = { nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" }
252
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
417
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
418 !llvm.module.flags = !{!0}
1f2b6ac9f198 LLVM16-1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 236
diff changeset
419 !0 = !{i32 1, !"amdgpu_code_object_version", i32 200}