120
|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
121
|
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
|
3 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
|
|
4
|
|
5 declare i32 @llvm.amdgcn.workitem.id.x() #1
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 declare i16 @llvm.bitreverse.i16(i16) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 declare i32 @llvm.bitreverse.i32(i32) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9 declare i64 @llvm.bitreverse.i64(i64) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 ; FUNC-LABEL: {{^}}s_brev_i16:
|
134
|
18 ; SI: s_brev_b32
|
121
|
19 define amdgpu_kernel void @s_brev_i16(i16 addrspace(1)* noalias %out, i16 %val) #0 {
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 store i16 %brev, i16 addrspace(1)* %out
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 ; FUNC-LABEL: {{^}}v_brev_i16:
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26 ; SI: v_bfrev_b32_e32
|
121
|
27 define amdgpu_kernel void @v_brev_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) #0 {
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
28 %val = load i16, i16 addrspace(1)* %valptr
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
29 %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
30 store i16 %brev, i16 addrspace(1)* %out
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
33
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 ; FUNC-LABEL: {{^}}s_brev_i32:
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 ; SI: s_load_dword [[VAL:s[0-9]+]],
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 ; SI: s_brev_b32 [[SRESULT:s[0-9]+]], [[VAL]]
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
38 ; SI: buffer_store_dword [[VRESULT]],
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
39 ; SI: s_endpgm
|
121
|
40 define amdgpu_kernel void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) #0 {
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
41 %brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
42 store i32 %brev, i32 addrspace(1)* %out
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
45
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46 ; FUNC-LABEL: {{^}}v_brev_i32:
|
121
|
47 ; SI: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
48 ; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
49 ; SI: buffer_store_dword [[RESULT]],
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
50 ; SI: s_endpgm
|
121
|
51 define amdgpu_kernel void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #0 {
|
|
52 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
53 %gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
|
54 %val = load i32, i32 addrspace(1)* %gep
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
55 %brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
56 store i32 %brev, i32 addrspace(1)* %out
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
57 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
58 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
59
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
60 ; FUNC-LABEL: {{^}}s_brev_v2i32:
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
61 ; SI: s_brev_b32
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
62 ; SI: s_brev_b32
|
121
|
63 define amdgpu_kernel void @s_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> %val) #0 {
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
64 %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
65 store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
66 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
67 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
68
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
69 ; FUNC-LABEL: {{^}}v_brev_v2i32:
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
70 ; SI: v_bfrev_b32_e32
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
71 ; SI: v_bfrev_b32_e32
|
121
|
72 define amdgpu_kernel void @v_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #0 {
|
|
73 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
74 %gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
|
|
75 %val = load <2 x i32>, <2 x i32> addrspace(1)* %gep
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
76 %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
77 store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
78 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
79 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
80
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
81 ; FUNC-LABEL: {{^}}s_brev_i64:
|
121
|
82 define amdgpu_kernel void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val) #0 {
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
83 %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
84 store i64 %brev, i64 addrspace(1)* %out
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
85 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
86 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
87
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
88 ; FUNC-LABEL: {{^}}v_brev_i64:
|
120
|
89 ; SI-NOT: v_or_b32_e64 v{{[0-9]+}}, 0, 0
|
121
|
90 define amdgpu_kernel void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #0 {
|
|
91 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
92 %gep = getelementptr i64, i64 addrspace(1)* %valptr, i32 %tid
|
|
93 %val = load i64, i64 addrspace(1)* %gep
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
94 %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
95 store i64 %brev, i64 addrspace(1)* %out
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
96 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
97 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
98
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
99 ; FUNC-LABEL: {{^}}s_brev_v2i64:
|
121
|
100 define amdgpu_kernel void @s_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %val) #0 {
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
101 %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
102 store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
103 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
104 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
105
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
106 ; FUNC-LABEL: {{^}}v_brev_v2i64:
|
121
|
107 define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #0 {
|
|
108 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
109 %gep = getelementptr <2 x i64> , <2 x i64> addrspace(1)* %valptr, i32 %tid
|
|
110 %val = load <2 x i64>, <2 x i64> addrspace(1)* %gep
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
111 %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
112 store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
113 ret void
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
114 }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
115
|
134
|
116 ; FUNC-LABEL: {{^}}missing_truncate_promote_bitreverse:
|
|
117 ; VI: v_bfrev_b32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
|
|
118 define float @missing_truncate_promote_bitreverse(i32 %arg) {
|
|
119 bb:
|
|
120 %tmp = trunc i32 %arg to i16
|
|
121 %tmp1 = call i16 @llvm.bitreverse.i16(i16 %tmp)
|
|
122 %tmp2 = bitcast i16 %tmp1 to half
|
|
123 %tmp3 = fpext half %tmp2 to float
|
|
124 ret float %tmp3
|
|
125 }
|
|
126
|
100
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
127 attributes #0 = { nounwind }
|
Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
128 attributes #1 = { nounwind readnone }
|