CbC/CbC_llvm: llvm/test/CodeGen/AMDGPU/addrspacecast.ll annotate

annotate llvm/test/CodeGen/AMDGPU/addrspacecast.ll @ 206:f17a3b42b08b

Added tag before-12 for changeset b7591485f4cd

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Mon, 07 Jun 2021 21:25:57 +0900
parents	0572611fdcc8
children	2e18cbf3894f

rev	line source
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1 ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-code-object-v3,-promote-alloca -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s
1d019706d866 LLVM10 anatofuz parents: diff changeset	2 ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-code-object-v3,-promote-alloca -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s
1d019706d866 LLVM10 anatofuz parents: diff changeset	3
1d019706d866 LLVM10 anatofuz parents: diff changeset	4 ; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
1d019706d866 LLVM10 anatofuz parents: diff changeset	5 ; HSA: enable_sgpr_private_segment_buffer = 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	6 ; HSA: enable_sgpr_dispatch_ptr = 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	7 ; CI: enable_sgpr_queue_ptr = 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	8 ; GFX9: enable_sgpr_queue_ptr = 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	9
1d019706d866 LLVM10 anatofuz parents: diff changeset	10 ; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	11 ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	12 ; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	13 ; CI-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
1d019706d866 LLVM10 anatofuz parents: diff changeset	14 ; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
1d019706d866 LLVM10 anatofuz parents: diff changeset	15 ; CI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	16 ; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	17
1d019706d866 LLVM10 anatofuz parents: diff changeset	18 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
1d019706d866 LLVM10 anatofuz parents: diff changeset	19 ; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	20 ; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)
1d019706d866 LLVM10 anatofuz parents: diff changeset	21 ; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16
1d019706d866 LLVM10 anatofuz parents: diff changeset	22 ; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_SHARED_BASE]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	23
1d019706d866 LLVM10 anatofuz parents: diff changeset	24 ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base
1d019706d866 LLVM10 anatofuz parents: diff changeset	25 ; GFX9: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
1d019706d866 LLVM10 anatofuz parents: diff changeset	26 ; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
1d019706d866 LLVM10 anatofuz parents: diff changeset	27 ; GFX9-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	28 ; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	29
1d019706d866 LLVM10 anatofuz parents: diff changeset	30 ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	31
1d019706d866 LLVM10 anatofuz parents: diff changeset	32 ; At most 2 digits. Make sure src_shared_base is not counted as a high
1d019706d866 LLVM10 anatofuz parents: diff changeset	33 ; number SGPR.
1d019706d866 LLVM10 anatofuz parents: diff changeset	34
1d019706d866 LLVM10 anatofuz parents: diff changeset	35 ; CI: NumSgprs: {{[0-9][0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	36 ; GFX9: NumSgprs: {{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	37 define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	38 %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
1d019706d866 LLVM10 anatofuz parents: diff changeset	39 store volatile i32 7, i32* %stof
1d019706d866 LLVM10 anatofuz parents: diff changeset	40 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	41 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	42
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	43 ; Test handling inside a non-kernel
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	44 ; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast_func:
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	45 ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	46 ; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	47 ; CI-DAG: v_cmp_ne_u32_e32 vcc, -1, v0
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	48 ; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	49 ; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	50
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	51 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	52 ; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	53 ; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	54 ; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_SHARED_BASE]]
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	55
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	56 ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	57 ; GFX9-DAG: v_cmp_ne_u32_e32 vcc, -1, v0
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	58 ; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0, vcc
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	59 ; GFX9-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	60
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	61 ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	62 define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 {
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	63 %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	64 store volatile i32 7, i32* %stof
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	65 ret void
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	66 }
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	67
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	68 ; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast:
1d019706d866 LLVM10 anatofuz parents: diff changeset	69 ; HSA: enable_sgpr_private_segment_buffer = 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	70 ; HSA: enable_sgpr_dispatch_ptr = 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	71 ; CI: enable_sgpr_queue_ptr = 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	72 ; GFX9: enable_sgpr_queue_ptr = 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	73
1d019706d866 LLVM10 anatofuz parents: diff changeset	74 ; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	75 ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	76 ; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	77
1d019706d866 LLVM10 anatofuz parents: diff changeset	78 ; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
1d019706d866 LLVM10 anatofuz parents: diff changeset	79 ; CI-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	80 ; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
1d019706d866 LLVM10 anatofuz parents: diff changeset	81 ; CI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	82 ; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	83
1d019706d866 LLVM10 anatofuz parents: diff changeset	84 ; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	85 ; GFX9-DAG: s_getreg_b32 [[SSRC_PRIVATE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16)
1d019706d866 LLVM10 anatofuz parents: diff changeset	86 ; GFX9-DAG: s_lshl_b32 [[SSRC_PRIVATE_BASE:s[0-9]+]], [[SSRC_PRIVATE]], 16
1d019706d866 LLVM10 anatofuz parents: diff changeset	87 ; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_PRIVATE_BASE]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	88
1d019706d866 LLVM10 anatofuz parents: diff changeset	89 ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base
1d019706d866 LLVM10 anatofuz parents: diff changeset	90
1d019706d866 LLVM10 anatofuz parents: diff changeset	91 ; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
1d019706d866 LLVM10 anatofuz parents: diff changeset	92 ; GFX9: v_cmp_ne_u32_e64 vcc, [[PTR]], 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	93 ; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
1d019706d866 LLVM10 anatofuz parents: diff changeset	94 ; GFX9: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	95 ; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	96
1d019706d866 LLVM10 anatofuz parents: diff changeset	97 ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	98
1d019706d866 LLVM10 anatofuz parents: diff changeset	99 ; CI: NumSgprs: {{[0-9][0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	100 ; GFX9: NumSgprs: {{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	101 define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	102 %stof = addrspacecast i32 addrspace(5)* %ptr to i32*
1d019706d866 LLVM10 anatofuz parents: diff changeset	103 store volatile i32 7, i32* %stof
1d019706d866 LLVM10 anatofuz parents: diff changeset	104 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	105 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	106
1d019706d866 LLVM10 anatofuz parents: diff changeset	107 ; no-op
1d019706d866 LLVM10 anatofuz parents: diff changeset	108 ; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast:
1d019706d866 LLVM10 anatofuz parents: diff changeset	109 ; HSA: enable_sgpr_queue_ptr = 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	110
1d019706d866 LLVM10 anatofuz parents: diff changeset	111 ; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	112 ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	113 ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	114 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
1d019706d866 LLVM10 anatofuz parents: diff changeset	115 ; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	116 define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	117 %stof = addrspacecast i32 addrspace(1)* %ptr to i32*
1d019706d866 LLVM10 anatofuz parents: diff changeset	118 store volatile i32 7, i32* %stof
1d019706d866 LLVM10 anatofuz parents: diff changeset	119 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	120 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	121
1d019706d866 LLVM10 anatofuz parents: diff changeset	122 ; no-op
1d019706d866 LLVM10 anatofuz parents: diff changeset	123 ; HSA-LABEl: {{^}}use_constant_to_flat_addrspacecast:
1d019706d866 LLVM10 anatofuz parents: diff changeset	124 ; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	125 ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	126 ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	127 ; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	128 define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	129 %stof = addrspacecast i32 addrspace(4)* %ptr to i32*
1d019706d866 LLVM10 anatofuz parents: diff changeset	130 %ld = load volatile i32, i32* %stof
1d019706d866 LLVM10 anatofuz parents: diff changeset	131 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	132 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	133
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	134 ; HSA-LABEl: {{^}}use_constant_to_global_addrspacecast:
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	135 ; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	136 ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	137 ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	138 ; HSA: {{flat\|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	139 define amdgpu_kernel void @use_constant_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	140 %stof = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	141 %ld = load volatile i32, i32 addrspace(1)* %stof
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	142 ret void
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	143 }
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	144
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	145 ; HSA-LABEL: {{^}}use_flat_to_group_addrspacecast:
1d019706d866 LLVM10 anatofuz parents: diff changeset	146 ; HSA: enable_sgpr_private_segment_buffer = 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	147 ; HSA: enable_sgpr_dispatch_ptr = 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	148 ; HSA: enable_sgpr_queue_ptr = 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	149
1d019706d866 LLVM10 anatofuz parents: diff changeset	150 ; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	151 ; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	152 ; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	153 ; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	154 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	155 ; HSA: ds_write_b32 [[CASTPTR]], v[[K]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	156 define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	157 %ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
1d019706d866 LLVM10 anatofuz parents: diff changeset	158 store volatile i32 0, i32 addrspace(3)* %ftos
1d019706d866 LLVM10 anatofuz parents: diff changeset	159 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	160 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	161
1d019706d866 LLVM10 anatofuz parents: diff changeset	162 ; HSA-LABEL: {{^}}use_flat_to_private_addrspacecast:
1d019706d866 LLVM10 anatofuz parents: diff changeset	163 ; HSA: enable_sgpr_private_segment_buffer = 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	164 ; HSA: enable_sgpr_dispatch_ptr = 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	165 ; HSA: enable_sgpr_queue_ptr = 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	166
1d019706d866 LLVM10 anatofuz parents: diff changeset	167 ; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	168 ; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	169 ; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	170 ; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], 0, v[[VPTR_LO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	171 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	172 ; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	173 define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	174 %ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
1d019706d866 LLVM10 anatofuz parents: diff changeset	175 store volatile i32 0, i32 addrspace(5)* %ftos
1d019706d866 LLVM10 anatofuz parents: diff changeset	176 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	177 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	178
1d019706d866 LLVM10 anatofuz parents: diff changeset	179 ; HSA-LABEL: {{^}}use_flat_to_global_addrspacecast:
1d019706d866 LLVM10 anatofuz parents: diff changeset	180 ; HSA: enable_sgpr_queue_ptr = 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	181
1d019706d866 LLVM10 anatofuz parents: diff changeset	182 ; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
1d019706d866 LLVM10 anatofuz parents: diff changeset	183 ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	184 ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	185 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	186 ; HSA: {{flat\|global}}_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	187 define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	188 %ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
1d019706d866 LLVM10 anatofuz parents: diff changeset	189 store volatile i32 0, i32 addrspace(1)* %ftos
1d019706d866 LLVM10 anatofuz parents: diff changeset	190 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	191 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	192
1d019706d866 LLVM10 anatofuz parents: diff changeset	193 ; HSA-LABEL: {{^}}use_flat_to_constant_addrspacecast:
1d019706d866 LLVM10 anatofuz parents: diff changeset	194 ; HSA: enable_sgpr_queue_ptr = 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	195
1d019706d866 LLVM10 anatofuz parents: diff changeset	196 ; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
1d019706d866 LLVM10 anatofuz parents: diff changeset	197 ; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0
1d019706d866 LLVM10 anatofuz parents: diff changeset	198 define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	199 %ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
1d019706d866 LLVM10 anatofuz parents: diff changeset	200 load volatile i32, i32 addrspace(4)* %ftos
1d019706d866 LLVM10 anatofuz parents: diff changeset	201 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	202 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	203
1d019706d866 LLVM10 anatofuz parents: diff changeset	204 ; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
1d019706d866 LLVM10 anatofuz parents: diff changeset	205 ; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
1d019706d866 LLVM10 anatofuz parents: diff changeset	206 ; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	207 ; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)
1d019706d866 LLVM10 anatofuz parents: diff changeset	208 ; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16
1d019706d866 LLVM10 anatofuz parents: diff changeset	209 ; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SSRC_SHARED_BASE]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	210
1d019706d866 LLVM10 anatofuz parents: diff changeset	211 ; GFX9-XXX: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base
1d019706d866 LLVM10 anatofuz parents: diff changeset	212
1d019706d866 LLVM10 anatofuz parents: diff changeset	213 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	214 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	215 ; HSA: {{flat\|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	216 define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	217 %cast = addrspacecast i32 addrspace(3)* null to i32*
1d019706d866 LLVM10 anatofuz parents: diff changeset	218 store volatile i32 7, i32* %cast
1d019706d866 LLVM10 anatofuz parents: diff changeset	219 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	220 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	221
1d019706d866 LLVM10 anatofuz parents: diff changeset	222 ; HSA-LABEL: {{^}}cast_0_flat_to_group_addrspacecast:
1d019706d866 LLVM10 anatofuz parents: diff changeset	223 ; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	224 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	225 ; HSA: ds_write_b32 [[PTR]], [[K]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	226 define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	227 %cast = addrspacecast i32* null to i32 addrspace(3)*
1d019706d866 LLVM10 anatofuz parents: diff changeset	228 store volatile i32 7, i32 addrspace(3)* %cast
1d019706d866 LLVM10 anatofuz parents: diff changeset	229 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	230 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	231
1d019706d866 LLVM10 anatofuz parents: diff changeset	232 ; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast:
1d019706d866 LLVM10 anatofuz parents: diff changeset	233 ; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	234 ; HSA: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	235 ; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	236 ; HSA: {{flat\|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	237 define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	238 %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)) to i32
1d019706d866 LLVM10 anatofuz parents: diff changeset	239 store volatile i32 7, i32* %cast
1d019706d866 LLVM10 anatofuz parents: diff changeset	240 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	241 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	242
1d019706d866 LLVM10 anatofuz parents: diff changeset	243 ; HSA-LABEL: {{^}}cast_neg1_flat_to_group_addrspacecast:
1d019706d866 LLVM10 anatofuz parents: diff changeset	244 ; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	245 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	246 ; HSA: ds_write_b32 [[PTR]], [[K]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	247 define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	248 %cast = addrspacecast i32* inttoptr (i64 -1 to i32) to i32 addrspace(3)
1d019706d866 LLVM10 anatofuz parents: diff changeset	249 store volatile i32 7, i32 addrspace(3)* %cast
1d019706d866 LLVM10 anatofuz parents: diff changeset	250 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	251 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	252
1d019706d866 LLVM10 anatofuz parents: diff changeset	253 ; FIXME: Shouldn't need to enable queue ptr
1d019706d866 LLVM10 anatofuz parents: diff changeset	254 ; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
1d019706d866 LLVM10 anatofuz parents: diff changeset	255 ; CI: enable_sgpr_queue_ptr = 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	256 ; GFX9: enable_sgpr_queue_ptr = 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	257
1d019706d866 LLVM10 anatofuz parents: diff changeset	258 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	259 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	260 ; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	261 ; HSA: {{flat\|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	262 define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	263 %cast = addrspacecast i32 addrspace(5)* null to i32*
1d019706d866 LLVM10 anatofuz parents: diff changeset	264 store volatile i32 7, i32* %cast
1d019706d866 LLVM10 anatofuz parents: diff changeset	265 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	266 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	267
1d019706d866 LLVM10 anatofuz parents: diff changeset	268 ; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast:
1d019706d866 LLVM10 anatofuz parents: diff changeset	269 ; HSA: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	270 ; HSA: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	271 define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	272 %cast = addrspacecast i32* null to i32 addrspace(5)*
1d019706d866 LLVM10 anatofuz parents: diff changeset	273 store volatile i32 7, i32 addrspace(5)* %cast
1d019706d866 LLVM10 anatofuz parents: diff changeset	274 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	275 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	276
1d019706d866 LLVM10 anatofuz parents: diff changeset	277 ; Disable optimizations in case there are optimizations added that
1d019706d866 LLVM10 anatofuz parents: diff changeset	278 ; specialize away generic pointer accesses.
1d019706d866 LLVM10 anatofuz parents: diff changeset	279
1d019706d866 LLVM10 anatofuz parents: diff changeset	280 ; HSA-LABEL: {{^}}branch_use_flat_i32:
1d019706d866 LLVM10 anatofuz parents: diff changeset	281 ; HSA: {{flat\|global}}_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	282 ; HSA: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	283 define amdgpu_kernel void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	284 entry:
1d019706d866 LLVM10 anatofuz parents: diff changeset	285 %cmp = icmp ne i32 %c, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	286 br i1 %cmp, label %local, label %global
1d019706d866 LLVM10 anatofuz parents: diff changeset	287
1d019706d866 LLVM10 anatofuz parents: diff changeset	288 local:
1d019706d866 LLVM10 anatofuz parents: diff changeset	289 %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32*
1d019706d866 LLVM10 anatofuz parents: diff changeset	290 br label %end
1d019706d866 LLVM10 anatofuz parents: diff changeset	291
1d019706d866 LLVM10 anatofuz parents: diff changeset	292 global:
1d019706d866 LLVM10 anatofuz parents: diff changeset	293 %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32*
1d019706d866 LLVM10 anatofuz parents: diff changeset	294 br label %end
1d019706d866 LLVM10 anatofuz parents: diff changeset	295
1d019706d866 LLVM10 anatofuz parents: diff changeset	296 end:
1d019706d866 LLVM10 anatofuz parents: diff changeset	297 %fptr = phi i32* [ %flat_local, %local ], [ %flat_global, %global ]
1d019706d866 LLVM10 anatofuz parents: diff changeset	298 store volatile i32 %x, i32* %fptr, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	299 ; %val = load i32, i32* %fptr, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	300 ; store i32 %val, i32 addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	301 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	302 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	303
1d019706d866 LLVM10 anatofuz parents: diff changeset	304 ; Check for prologue initializing special SGPRs pointing to scratch.
1d019706d866 LLVM10 anatofuz parents: diff changeset	305 ; HSA-LABEL: {{^}}store_flat_scratch:
1d019706d866 LLVM10 anatofuz parents: diff changeset	306 ; CI-DAG: s_mov_b32 flat_scratch_lo, s9
1d019706d866 LLVM10 anatofuz parents: diff changeset	307 ; CI-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11
1d019706d866 LLVM10 anatofuz parents: diff changeset	308 ; CI: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	309
1d019706d866 LLVM10 anatofuz parents: diff changeset	310 ; GFX9: s_add_u32 flat_scratch_lo, s6, s9
1d019706d866 LLVM10 anatofuz parents: diff changeset	311 ; GFX9: s_addc_u32 flat_scratch_hi, s7, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	312
1d019706d866 LLVM10 anatofuz parents: diff changeset	313 ; HSA: {{flat\|global}}_store_dword
1d019706d866 LLVM10 anatofuz parents: diff changeset	314 ; HSA: s_barrier
1d019706d866 LLVM10 anatofuz parents: diff changeset	315 ; HSA: {{flat\|global}}_load_dword
1d019706d866 LLVM10 anatofuz parents: diff changeset	316 define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	317 %alloca = alloca i32, i32 9, align 4, addrspace(5)
1d019706d866 LLVM10 anatofuz parents: diff changeset	318 %x = call i32 @llvm.amdgcn.workitem.id.x() #2
1d019706d866 LLVM10 anatofuz parents: diff changeset	319 %pptr = getelementptr i32, i32 addrspace(5)* %alloca, i32 %x
1d019706d866 LLVM10 anatofuz parents: diff changeset	320 %fptr = addrspacecast i32 addrspace(5)* %pptr to i32*
1d019706d866 LLVM10 anatofuz parents: diff changeset	321 store volatile i32 %x, i32* %fptr
1d019706d866 LLVM10 anatofuz parents: diff changeset	322 ; Dummy call
1d019706d866 LLVM10 anatofuz parents: diff changeset	323 call void @llvm.amdgcn.s.barrier() #1
1d019706d866 LLVM10 anatofuz parents: diff changeset	324 %reload = load volatile i32, i32* %fptr, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	325 store volatile i32 %reload, i32 addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	326 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	327 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	328
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	329 ; HSA-LABEL: {{^}}use_constant_to_constant32_addrspacecast
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	330 ; GFX9: s_load_dwordx2 [[PTRPTR:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	331 ; GFX9: s_load_dword [[OFFSET:s[0-9]+]], s[4:5], 0x8{{$}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	332 ; GFX9: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}, [[PTRPTR]], 0x0{{$}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	333 ; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	334 ; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]]
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	335 ; GFX9: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x0{{$}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	336 define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(i8 addrspace(4)* addrspace(4)* %ptr.ptr, i32 %offset) #0 {
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	337 %ptr = load volatile i8 addrspace(4), i8 addrspace(4) addrspace(4)* %ptr.ptr
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	338 %addrspacecast = addrspacecast i8 addrspace(4)* %ptr to i8 addrspace(6)*
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	339 %gep = getelementptr i8, i8 addrspace(6)* %addrspacecast, i32 %offset
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	340 %ptr.cast = bitcast i8 addrspace(6)* %gep to i32 addrspace(6)*
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	341 %load = load volatile i32, i32 addrspace(6)* %ptr.cast, align 4
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	342 ret void
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	343 }
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	344
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	345 ; HSA-LABEL: {{^}}use_global_to_constant32_addrspacecast
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	346 ; GFX9: s_load_dwordx2 [[PTRPTR:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	347 ; GFX9: s_load_dword [[OFFSET:s[0-9]+]], s[4:5], 0x8{{$}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	348 ; GFX9: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}, [[PTRPTR]], 0x0{{$}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	349 ; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	350 ; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]]
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	351 ; GFX9: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x0{{$}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	352 define amdgpu_kernel void @use_global_to_constant32_addrspacecast(i8 addrspace(1)* addrspace(4)* %ptr.ptr, i32 %offset) #0 {
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	353 %ptr = load volatile i8 addrspace(1), i8 addrspace(1) addrspace(4)* %ptr.ptr
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	354 %addrspacecast = addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(6)*
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	355 %gep = getelementptr i8, i8 addrspace(6)* %addrspacecast, i32 %offset
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	356 %ptr.cast = bitcast i8 addrspace(6)* %gep to i32 addrspace(6)*
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	357 %load = load volatile i32, i32 addrspace(6)* %ptr.cast, align 4
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	358 ret void
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	359 }
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	360
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	361 declare void @llvm.amdgcn.s.barrier() #1
1d019706d866 LLVM10 anatofuz parents: diff changeset	362 declare i32 @llvm.amdgcn.workitem.id.x() #2
1d019706d866 LLVM10 anatofuz parents: diff changeset	363
1d019706d866 LLVM10 anatofuz parents: diff changeset	364 attributes #0 = { nounwind }
1d019706d866 LLVM10 anatofuz parents: diff changeset	365 attributes #1 = { nounwind convergent }
1d019706d866 LLVM10 anatofuz parents: diff changeset	366 attributes #2 = { nounwind readnone }

Mercurial > hg > CbC > CbC_llvm

annotate llvm/test/CodeGen/AMDGPU/addrspacecast.ll @ 206:f17a3b42b08b