CbC/CbC_llvm: test/CodeGen/AMDGPU/extract_vector

annotate test/CodeGen/AMDGPU/extract_vector_elt-f16.ll @ 146:3fc4d5c3e21e

set tail call flag for code segment in CGCAll

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Sun, 23 Dec 2018 19:23:36 +0900
parents	3a76565eade5
children	c2174574ed3a

rev	line source
134 3a76565eade5 update 5.0.1 mir3636 parents: 121 diff changeset	1 ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=SI %s
3a76565eade5 update 5.0.1 mir3636 parents: 121 diff changeset	2 ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI %s
121 803732b1fca8 LLVM 5.0 kono parents: diff changeset	3
803732b1fca8 LLVM 5.0 kono parents: diff changeset	4 ; GCN-LABEL: {{^}}extract_vector_elt_v2f16:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	5 ; GCN: s_load_dword [[VEC:s[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	6 ; GCN: s_lshr_b32 [[ELT1:s[0-9]+]], [[VEC]], 16
803732b1fca8 LLVM 5.0 kono parents: diff changeset	7 ; GCN-DAG: v_mov_b32_e32 [[VELT0:v[0-9]+]], [[VEC]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	8 ; GCN-DAG: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	9 ; GCN-DAG: buffer_store_short [[VELT0]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	10 ; GCN-DAG: buffer_store_short [[VELT1]]
134 3a76565eade5 update 5.0.1 mir3636 parents: 121 diff changeset	11 define amdgpu_kernel void @extract_vector_elt_v2f16(half addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr) #0 {
3a76565eade5 update 5.0.1 mir3636 parents: 121 diff changeset	12 %vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
121 803732b1fca8 LLVM 5.0 kono parents: diff changeset	13 %p0 = extractelement <2 x half> %vec, i32 0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	14 %p1 = extractelement <2 x half> %vec, i32 1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	15 %out1 = getelementptr half, half addrspace(1)* %out, i32 10
803732b1fca8 LLVM 5.0 kono parents: diff changeset	16 store half %p1, half addrspace(1)* %out, align 2
803732b1fca8 LLVM 5.0 kono parents: diff changeset	17 store half %p0, half addrspace(1)* %out1, align 2
803732b1fca8 LLVM 5.0 kono parents: diff changeset	18 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	19 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	20
803732b1fca8 LLVM 5.0 kono parents: diff changeset	21 ; GCN-LABEL: {{^}}extract_vector_elt_v2f16_dynamic_sgpr:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	22 ; GCN: s_load_dword [[IDX:s[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	23 ; GCN: s_load_dword [[VEC:s[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	24 ; GCN: s_lshl_b32 [[IDX_SCALED:s[0-9]+]], [[IDX]], 16
803732b1fca8 LLVM 5.0 kono parents: diff changeset	25 ; GCN: s_lshr_b32 [[ELT1:s[0-9]+]], [[VEC]], [[IDX_SCALED]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	26 ; GCN: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	27 ; GCN: buffer_store_short [[VELT1]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	28 ; GCN: ScratchSize: 0
134 3a76565eade5 update 5.0.1 mir3636 parents: 121 diff changeset	29 define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_sgpr(half addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr, i32 %idx) #0 {
3a76565eade5 update 5.0.1 mir3636 parents: 121 diff changeset	30 %vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
121 803732b1fca8 LLVM 5.0 kono parents: diff changeset	31 %elt = extractelement <2 x half> %vec, i32 %idx
803732b1fca8 LLVM 5.0 kono parents: diff changeset	32 store half %elt, half addrspace(1)* %out, align 2
803732b1fca8 LLVM 5.0 kono parents: diff changeset	33 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	34 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	35
803732b1fca8 LLVM 5.0 kono parents: diff changeset	36 ; GCN-LABEL: {{^}}extract_vector_elt_v2f16_dynamic_vgpr:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	37 ; GCN-DAG: s_load_dword [[VEC:s[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	38 ; GCN-DAG: {{flat\|buffer}}_load_dword [[IDX:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	39 ; GCN: v_lshlrev_b32_e32 [[IDX_SCALED:v[0-9]+]], 16, [[IDX]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	40
803732b1fca8 LLVM 5.0 kono parents: diff changeset	41 ; SI: v_lshr_b32_e32 [[ELT:v[0-9]+]], [[VEC]], [[IDX_SCALED]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	42 ; VI: v_lshrrev_b32_e64 [[ELT:v[0-9]+]], [[IDX_SCALED]], [[VEC]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	43
803732b1fca8 LLVM 5.0 kono parents: diff changeset	44
803732b1fca8 LLVM 5.0 kono parents: diff changeset	45 ; SI: buffer_store_short [[ELT]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	46 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[ELT]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	47 ; GCN: ScratchSize: 0{{$}}
134 3a76565eade5 update 5.0.1 mir3636 parents: 121 diff changeset	48 define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_vgpr(half addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr, i32 addrspace(1)* %idx.ptr) #0 {
121 803732b1fca8 LLVM 5.0 kono parents: diff changeset	49 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	50 %tid.ext = sext i32 %tid to i64
803732b1fca8 LLVM 5.0 kono parents: diff changeset	51 %gep = getelementptr inbounds i32, i32 addrspace(1)* %idx.ptr, i64 %tid.ext
803732b1fca8 LLVM 5.0 kono parents: diff changeset	52 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
134 3a76565eade5 update 5.0.1 mir3636 parents: 121 diff changeset	53 %vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
121 803732b1fca8 LLVM 5.0 kono parents: diff changeset	54 %idx = load i32, i32 addrspace(1)* %gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	55 %elt = extractelement <2 x half> %vec, i32 %idx
803732b1fca8 LLVM 5.0 kono parents: diff changeset	56 store half %elt, half addrspace(1)* %out.gep, align 2
803732b1fca8 LLVM 5.0 kono parents: diff changeset	57 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	58 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	59
803732b1fca8 LLVM 5.0 kono parents: diff changeset	60 ; GCN-LABEL: {{^}}extract_vector_elt_v3f16:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	61 ; GCN: buffer_load_ushort
803732b1fca8 LLVM 5.0 kono parents: diff changeset	62 ; GCN: buffer_store_short
803732b1fca8 LLVM 5.0 kono parents: diff changeset	63 ; GCN: buffer_store_short
803732b1fca8 LLVM 5.0 kono parents: diff changeset	64 define amdgpu_kernel void @extract_vector_elt_v3f16(half addrspace(1)* %out, <3 x half> %foo) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	65 %p0 = extractelement <3 x half> %foo, i32 0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	66 %p1 = extractelement <3 x half> %foo, i32 2
803732b1fca8 LLVM 5.0 kono parents: diff changeset	67 %out1 = getelementptr half, half addrspace(1)* %out, i32 1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	68 store half %p1, half addrspace(1)* %out, align 2
803732b1fca8 LLVM 5.0 kono parents: diff changeset	69 store half %p0, half addrspace(1)* %out1, align 2
803732b1fca8 LLVM 5.0 kono parents: diff changeset	70 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	71 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	72
803732b1fca8 LLVM 5.0 kono parents: diff changeset	73 ; GCN-LABEL: {{^}}extract_vector_elt_v4f16:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	74 ; GCN: buffer_load_ushort
803732b1fca8 LLVM 5.0 kono parents: diff changeset	75 ; GCN: buffer_load_ushort
803732b1fca8 LLVM 5.0 kono parents: diff changeset	76 ; GCN: buffer_store_short
803732b1fca8 LLVM 5.0 kono parents: diff changeset	77 ; GCN: buffer_store_short
803732b1fca8 LLVM 5.0 kono parents: diff changeset	78 define amdgpu_kernel void @extract_vector_elt_v4f16(half addrspace(1)* %out, <4 x half> %foo) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	79 %p0 = extractelement <4 x half> %foo, i32 0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	80 %p1 = extractelement <4 x half> %foo, i32 2
803732b1fca8 LLVM 5.0 kono parents: diff changeset	81 %out1 = getelementptr half, half addrspace(1)* %out, i32 10
803732b1fca8 LLVM 5.0 kono parents: diff changeset	82 store half %p1, half addrspace(1)* %out, align 2
803732b1fca8 LLVM 5.0 kono parents: diff changeset	83 store half %p0, half addrspace(1)* %out1, align 2
803732b1fca8 LLVM 5.0 kono parents: diff changeset	84 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	85 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	86
803732b1fca8 LLVM 5.0 kono parents: diff changeset	87 ; GCN-LABEL: {{^}}dynamic_extract_vector_elt_v3f16:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	88 ; GCN: buffer_load_ushort
803732b1fca8 LLVM 5.0 kono parents: diff changeset	89 ; GCN: buffer_load_ushort
803732b1fca8 LLVM 5.0 kono parents: diff changeset	90 ; GCN: buffer_load_ushort
803732b1fca8 LLVM 5.0 kono parents: diff changeset	91
803732b1fca8 LLVM 5.0 kono parents: diff changeset	92 ; GCN: buffer_store_short
803732b1fca8 LLVM 5.0 kono parents: diff changeset	93 ; GCN: buffer_store_short
803732b1fca8 LLVM 5.0 kono parents: diff changeset	94 ; GCN: buffer_store_short
803732b1fca8 LLVM 5.0 kono parents: diff changeset	95
803732b1fca8 LLVM 5.0 kono parents: diff changeset	96 ; GCN: buffer_load_ushort
803732b1fca8 LLVM 5.0 kono parents: diff changeset	97 ; GCN: buffer_store_short
803732b1fca8 LLVM 5.0 kono parents: diff changeset	98 define amdgpu_kernel void @dynamic_extract_vector_elt_v3f16(half addrspace(1)* %out, <3 x half> %foo, i32 %idx) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	99 %p0 = extractelement <3 x half> %foo, i32 %idx
803732b1fca8 LLVM 5.0 kono parents: diff changeset	100 %out1 = getelementptr half, half addrspace(1)* %out, i32 1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	101 store half %p0, half addrspace(1)* %out
803732b1fca8 LLVM 5.0 kono parents: diff changeset	102 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	103 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	104
803732b1fca8 LLVM 5.0 kono parents: diff changeset	105 ; GCN-LABEL: {{^}}dynamic_extract_vector_elt_v4f16:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	106 ; GCN: buffer_load_ushort
803732b1fca8 LLVM 5.0 kono parents: diff changeset	107 ; GCN: buffer_load_ushort
803732b1fca8 LLVM 5.0 kono parents: diff changeset	108 ; GCN: buffer_load_ushort
803732b1fca8 LLVM 5.0 kono parents: diff changeset	109 ; GCN: buffer_load_ushort
803732b1fca8 LLVM 5.0 kono parents: diff changeset	110
803732b1fca8 LLVM 5.0 kono parents: diff changeset	111 ; GCN: buffer_store_short
803732b1fca8 LLVM 5.0 kono parents: diff changeset	112 ; GCN: buffer_store_short
803732b1fca8 LLVM 5.0 kono parents: diff changeset	113 ; GCN: buffer_store_short
803732b1fca8 LLVM 5.0 kono parents: diff changeset	114 ; GCN: buffer_store_short
803732b1fca8 LLVM 5.0 kono parents: diff changeset	115
803732b1fca8 LLVM 5.0 kono parents: diff changeset	116 ; GCN: buffer_load_ushort
803732b1fca8 LLVM 5.0 kono parents: diff changeset	117 ; GCN: buffer_store_short
803732b1fca8 LLVM 5.0 kono parents: diff changeset	118 define amdgpu_kernel void @dynamic_extract_vector_elt_v4f16(half addrspace(1)* %out, <4 x half> %foo, i32 %idx) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	119 %p0 = extractelement <4 x half> %foo, i32 %idx
803732b1fca8 LLVM 5.0 kono parents: diff changeset	120 %out1 = getelementptr half, half addrspace(1)* %out, i32 1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	121 store half %p0, half addrspace(1)* %out
803732b1fca8 LLVM 5.0 kono parents: diff changeset	122 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	123 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	124
803732b1fca8 LLVM 5.0 kono parents: diff changeset	125 declare i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	126
803732b1fca8 LLVM 5.0 kono parents: diff changeset	127 attributes #0 = { nounwind }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	128 attributes #1 = { nounwind readnone }

Mercurial > hg > CbC > CbC_llvm

annotate test/CodeGen/AMDGPU/extract_vector_elt-f16.ll @ 146:3fc4d5c3e21e