annotate llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll @ 206:f17a3b42b08b

Added tag before-12 for changeset b7591485f4cd
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 07 Jun 2021 21:25:57 +0900
parents 1d019706d866
children c4bab56944e8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
150
anatofuz
parents:
diff changeset
1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_10,GFX9 %s
anatofuz
parents:
diff changeset
2 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_10,GFX10 %s
anatofuz
parents:
diff changeset
3
anatofuz
parents:
diff changeset
4 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_1:
anatofuz
parents:
diff changeset
5 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
anatofuz
parents:
diff changeset
6 define amdgpu_kernel void @test_pk_max_f16_literal_0_1(<2 x half> addrspace(1)* nocapture %arg) {
anatofuz
parents:
diff changeset
7 bb:
anatofuz
parents:
diff changeset
8 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
9 %tmp1 = zext i32 %tmp to i64
anatofuz
parents:
diff changeset
10 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
anatofuz
parents:
diff changeset
11 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
12 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xH3C00>)
anatofuz
parents:
diff changeset
13 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
14 ret void
anatofuz
parents:
diff changeset
15 }
anatofuz
parents:
diff changeset
16
anatofuz
parents:
diff changeset
17 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_0:
anatofuz
parents:
diff changeset
18 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0{{$}}
anatofuz
parents:
diff changeset
19 define amdgpu_kernel void @test_pk_max_f16_literal_1_0(<2 x half> addrspace(1)* nocapture %arg) {
anatofuz
parents:
diff changeset
20 bb:
anatofuz
parents:
diff changeset
21 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
22 %tmp1 = zext i32 %tmp to i64
anatofuz
parents:
diff changeset
23 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
anatofuz
parents:
diff changeset
24 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
25 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH3C00, half 0xH0000>)
anatofuz
parents:
diff changeset
26 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
27 ret void
anatofuz
parents:
diff changeset
28 }
anatofuz
parents:
diff changeset
29
anatofuz
parents:
diff changeset
30 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_1:
anatofuz
parents:
diff changeset
31 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel_hi:[1,0]{{$}}
anatofuz
parents:
diff changeset
32 define amdgpu_kernel void @test_pk_max_f16_literal_1_1(<2 x half> addrspace(1)* nocapture %arg) {
anatofuz
parents:
diff changeset
33 bb:
anatofuz
parents:
diff changeset
34 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
35 %tmp1 = zext i32 %tmp to i64
anatofuz
parents:
diff changeset
36 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
anatofuz
parents:
diff changeset
37 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
38 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH3C00, half 0xH3C00>)
anatofuz
parents:
diff changeset
39 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
40 ret void
anatofuz
parents:
diff changeset
41 }
anatofuz
parents:
diff changeset
42
anatofuz
parents:
diff changeset
43 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_m1:
anatofuz
parents:
diff changeset
44 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
anatofuz
parents:
diff changeset
45 define amdgpu_kernel void @test_pk_max_f16_literal_0_m1(<2 x half> addrspace(1)* nocapture %arg) {
anatofuz
parents:
diff changeset
46 bb:
anatofuz
parents:
diff changeset
47 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
48 %tmp1 = zext i32 %tmp to i64
anatofuz
parents:
diff changeset
49 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
anatofuz
parents:
diff changeset
50 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
51 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xHBC00>)
anatofuz
parents:
diff changeset
52 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
53 ret void
anatofuz
parents:
diff changeset
54 }
anatofuz
parents:
diff changeset
55
anatofuz
parents:
diff changeset
56 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_0:
anatofuz
parents:
diff changeset
57 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0{{$}}
anatofuz
parents:
diff changeset
58 define amdgpu_kernel void @test_pk_max_f16_literal_m1_0(<2 x half> addrspace(1)* nocapture %arg) {
anatofuz
parents:
diff changeset
59 bb:
anatofuz
parents:
diff changeset
60 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
61 %tmp1 = zext i32 %tmp to i64
anatofuz
parents:
diff changeset
62 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
anatofuz
parents:
diff changeset
63 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
64 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xHBC00, half 0xH0000>)
anatofuz
parents:
diff changeset
65 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
66 ret void
anatofuz
parents:
diff changeset
67 }
anatofuz
parents:
diff changeset
68
anatofuz
parents:
diff changeset
69 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_m1:
anatofuz
parents:
diff changeset
70 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel_hi:[1,0]{{$}}
anatofuz
parents:
diff changeset
71 define amdgpu_kernel void @test_pk_max_f16_literal_m1_m1(<2 x half> addrspace(1)* nocapture %arg) {
anatofuz
parents:
diff changeset
72 bb:
anatofuz
parents:
diff changeset
73 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
74 %tmp1 = zext i32 %tmp to i64
anatofuz
parents:
diff changeset
75 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
anatofuz
parents:
diff changeset
76 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
77 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xHBC00, half 0xHBC00>)
anatofuz
parents:
diff changeset
78 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
79 ret void
anatofuz
parents:
diff changeset
80 }
anatofuz
parents:
diff changeset
81
anatofuz
parents:
diff changeset
82 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_0:
anatofuz
parents:
diff changeset
83 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}}
anatofuz
parents:
diff changeset
84 define amdgpu_kernel void @test_pk_max_f16_literal_0_0(<2 x half> addrspace(1)* nocapture %arg) {
anatofuz
parents:
diff changeset
85 bb:
anatofuz
parents:
diff changeset
86 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
87 %tmp1 = zext i32 %tmp to i64
anatofuz
parents:
diff changeset
88 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
anatofuz
parents:
diff changeset
89 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
90 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xH0000>)
anatofuz
parents:
diff changeset
91 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
92 ret void
anatofuz
parents:
diff changeset
93 }
anatofuz
parents:
diff changeset
94
anatofuz
parents:
diff changeset
95 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_41c8:
anatofuz
parents:
diff changeset
96 ; GFX9: s_mov_b32 [[C:s[0-9]+]], 0x41c80000
anatofuz
parents:
diff changeset
97 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
anatofuz
parents:
diff changeset
98 ; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}} op_sel:[1,0] op_sel_hi:[0,1]{{$}}
anatofuz
parents:
diff changeset
99 define amdgpu_kernel void @test_pk_max_f16_literal_0_41c8(<2 x half> addrspace(1)* nocapture %arg) {
anatofuz
parents:
diff changeset
100 bb:
anatofuz
parents:
diff changeset
101 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
102 %tmp1 = zext i32 %tmp to i64
anatofuz
parents:
diff changeset
103 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
anatofuz
parents:
diff changeset
104 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
105 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xH41C8>)
anatofuz
parents:
diff changeset
106 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
107 ret void
anatofuz
parents:
diff changeset
108 }
anatofuz
parents:
diff changeset
109
anatofuz
parents:
diff changeset
110 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_41c8_0:
anatofuz
parents:
diff changeset
111 ; GFX9: s_movk_i32 [[C:s[0-9]+]], 0x41c8
anatofuz
parents:
diff changeset
112 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
anatofuz
parents:
diff changeset
113 ; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}}{{$}}
anatofuz
parents:
diff changeset
114 define amdgpu_kernel void @test_pk_max_f16_literal_41c8_0(<2 x half> addrspace(1)* nocapture %arg) {
anatofuz
parents:
diff changeset
115 bb:
anatofuz
parents:
diff changeset
116 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
117 %tmp1 = zext i32 %tmp to i64
anatofuz
parents:
diff changeset
118 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
anatofuz
parents:
diff changeset
119 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
120 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH41C8, half 0xH0>)
anatofuz
parents:
diff changeset
121 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
122 ret void
anatofuz
parents:
diff changeset
123 }
anatofuz
parents:
diff changeset
124
anatofuz
parents:
diff changeset
125 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_42ca_41c8:
anatofuz
parents:
diff changeset
126 ; GFX9: s_mov_b32 [[C:s[0-9]+]], 0x41c842ca
anatofuz
parents:
diff changeset
127 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
anatofuz
parents:
diff changeset
128 ; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c842ca, v{{[0-9]+}}{{$}}
anatofuz
parents:
diff changeset
129 define amdgpu_kernel void @test_pk_max_f16_literal_42ca_41c8(<2 x half> addrspace(1)* nocapture %arg) {
anatofuz
parents:
diff changeset
130 bb:
anatofuz
parents:
diff changeset
131 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
132 %tmp1 = zext i32 %tmp to i64
anatofuz
parents:
diff changeset
133 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
anatofuz
parents:
diff changeset
134 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
135 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH42CA, half 0xH41C8>)
anatofuz
parents:
diff changeset
136 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
anatofuz
parents:
diff changeset
137 ret void
anatofuz
parents:
diff changeset
138 }
anatofuz
parents:
diff changeset
139
anatofuz
parents:
diff changeset
140 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
anatofuz
parents:
diff changeset
141 declare i32 @llvm.amdgcn.workitem.id.x()