CbC/CbC_llvm: llvm/test/CodeGen/AMDGPU/pk_max_f16

annotate llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll @ 206:f17a3b42b08b

Added tag before-12 for changeset b7591485f4cd

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Mon, 07 Jun 2021 21:25:57 +0900
parents	1d019706d866
children	c4bab56944e8

rev	line source
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,GFX9_10,GFX9 %s
1d019706d866 LLVM10 anatofuz parents: diff changeset	2 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,GFX9_10,GFX10 %s
1d019706d866 LLVM10 anatofuz parents: diff changeset	3
1d019706d866 LLVM10 anatofuz parents: diff changeset	4 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	5 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	6 define amdgpu_kernel void @test_pk_max_f16_literal_0_1(<2 x half> addrspace(1)* nocapture %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	7 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	8 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	9 %tmp1 = zext i32 %tmp to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	10 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
1d019706d866 LLVM10 anatofuz parents: diff changeset	11 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	12 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xH3C00>)
1d019706d866 LLVM10 anatofuz parents: diff changeset	13 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	14 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	15 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	16
1d019706d866 LLVM10 anatofuz parents: diff changeset	17 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_0:
1d019706d866 LLVM10 anatofuz parents: diff changeset	18 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	19 define amdgpu_kernel void @test_pk_max_f16_literal_1_0(<2 x half> addrspace(1)* nocapture %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	20 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	21 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	22 %tmp1 = zext i32 %tmp to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	23 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
1d019706d866 LLVM10 anatofuz parents: diff changeset	24 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	25 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH3C00, half 0xH0000>)
1d019706d866 LLVM10 anatofuz parents: diff changeset	26 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	27 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	28 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	29
1d019706d866 LLVM10 anatofuz parents: diff changeset	30 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	31 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel_hi:[1,0]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	32 define amdgpu_kernel void @test_pk_max_f16_literal_1_1(<2 x half> addrspace(1)* nocapture %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	33 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	34 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	35 %tmp1 = zext i32 %tmp to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	36 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
1d019706d866 LLVM10 anatofuz parents: diff changeset	37 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	38 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH3C00, half 0xH3C00>)
1d019706d866 LLVM10 anatofuz parents: diff changeset	39 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	40 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	41 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	42
1d019706d866 LLVM10 anatofuz parents: diff changeset	43 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_m1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	44 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	45 define amdgpu_kernel void @test_pk_max_f16_literal_0_m1(<2 x half> addrspace(1)* nocapture %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	46 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	47 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	48 %tmp1 = zext i32 %tmp to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	49 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
1d019706d866 LLVM10 anatofuz parents: diff changeset	50 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	51 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xHBC00>)
1d019706d866 LLVM10 anatofuz parents: diff changeset	52 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	53 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	54 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	55
1d019706d866 LLVM10 anatofuz parents: diff changeset	56 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_0:
1d019706d866 LLVM10 anatofuz parents: diff changeset	57 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	58 define amdgpu_kernel void @test_pk_max_f16_literal_m1_0(<2 x half> addrspace(1)* nocapture %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	59 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	60 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	61 %tmp1 = zext i32 %tmp to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	62 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
1d019706d866 LLVM10 anatofuz parents: diff changeset	63 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	64 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xHBC00, half 0xH0000>)
1d019706d866 LLVM10 anatofuz parents: diff changeset	65 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	66 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	67 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	68
1d019706d866 LLVM10 anatofuz parents: diff changeset	69 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_m1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	70 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel_hi:[1,0]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	71 define amdgpu_kernel void @test_pk_max_f16_literal_m1_m1(<2 x half> addrspace(1)* nocapture %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	72 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	73 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	74 %tmp1 = zext i32 %tmp to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	75 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
1d019706d866 LLVM10 anatofuz parents: diff changeset	76 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	77 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xHBC00, half 0xHBC00>)
1d019706d866 LLVM10 anatofuz parents: diff changeset	78 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	79 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	80 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	81
1d019706d866 LLVM10 anatofuz parents: diff changeset	82 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_0:
1d019706d866 LLVM10 anatofuz parents: diff changeset	83 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	84 define amdgpu_kernel void @test_pk_max_f16_literal_0_0(<2 x half> addrspace(1)* nocapture %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	85 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	86 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	87 %tmp1 = zext i32 %tmp to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	88 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
1d019706d866 LLVM10 anatofuz parents: diff changeset	89 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	90 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xH0000>)
1d019706d866 LLVM10 anatofuz parents: diff changeset	91 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	92 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	93 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	94
1d019706d866 LLVM10 anatofuz parents: diff changeset	95 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_41c8:
1d019706d866 LLVM10 anatofuz parents: diff changeset	96 ; GFX9: s_mov_b32 [[C:s[0-9]+]], 0x41c80000
1d019706d866 LLVM10 anatofuz parents: diff changeset	97 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	98 ; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}} op_sel:[1,0] op_sel_hi:[0,1]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	99 define amdgpu_kernel void @test_pk_max_f16_literal_0_41c8(<2 x half> addrspace(1)* nocapture %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	100 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	101 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	102 %tmp1 = zext i32 %tmp to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	103 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
1d019706d866 LLVM10 anatofuz parents: diff changeset	104 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	105 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xH41C8>)
1d019706d866 LLVM10 anatofuz parents: diff changeset	106 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	107 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	108 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	109
1d019706d866 LLVM10 anatofuz parents: diff changeset	110 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_41c8_0:
1d019706d866 LLVM10 anatofuz parents: diff changeset	111 ; GFX9: s_movk_i32 [[C:s[0-9]+]], 0x41c8
1d019706d866 LLVM10 anatofuz parents: diff changeset	112 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	113 ; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}}{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	114 define amdgpu_kernel void @test_pk_max_f16_literal_41c8_0(<2 x half> addrspace(1)* nocapture %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	115 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	116 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	117 %tmp1 = zext i32 %tmp to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	118 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
1d019706d866 LLVM10 anatofuz parents: diff changeset	119 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	120 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH41C8, half 0xH0>)
1d019706d866 LLVM10 anatofuz parents: diff changeset	121 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	122 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	123 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	124
1d019706d866 LLVM10 anatofuz parents: diff changeset	125 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_42ca_41c8:
1d019706d866 LLVM10 anatofuz parents: diff changeset	126 ; GFX9: s_mov_b32 [[C:s[0-9]+]], 0x41c842ca
1d019706d866 LLVM10 anatofuz parents: diff changeset	127 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	128 ; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c842ca, v{{[0-9]+}}{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	129 define amdgpu_kernel void @test_pk_max_f16_literal_42ca_41c8(<2 x half> addrspace(1)* nocapture %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	130 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	131 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	132 %tmp1 = zext i32 %tmp to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	133 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
1d019706d866 LLVM10 anatofuz parents: diff changeset	134 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	135 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH42CA, half 0xH41C8>)
1d019706d866 LLVM10 anatofuz parents: diff changeset	136 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	137 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	138 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	139
1d019706d866 LLVM10 anatofuz parents: diff changeset	140 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
1d019706d866 LLVM10 anatofuz parents: diff changeset	141 declare i32 @llvm.amdgcn.workitem.id.x()

Mercurial > hg > CbC > CbC_llvm

annotate llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll @ 206:f17a3b42b08b