95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
121
|
2 ; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 ; FIXME: Enable VI
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5
|
120
|
6 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 declare float @llvm.fabs.f32(float) nounwind readnone
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9 ; GCN-LABEL: {{^}}madak_f32:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 ; GCN: buffer_load_dword [[VA:v[0-9]+]]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 ; GCN: buffer_load_dword [[VB:v[0-9]+]]
|
121
|
12 ; GCN: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
|
|
13 define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
|
120
|
14 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 %a = load float, float addrspace(1)* %in.a.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 %b = load float, float addrspace(1)* %in.b.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 %mul = fmul float %a, %b
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 %madak = fadd float %mul, 10.0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 store float %madak, float addrspace(1)* %out.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
28 ; Make sure this is only folded with one use. This is a code size
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
29 ; optimization and if we fold the immediate multiple times, we'll undo
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
30 ; it.
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 ; GCN-LABEL: {{^}}madak_2_use_f32:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
33 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 ; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
|
121
|
37 ; GCN-DAG: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
|
|
38 ; GCN-DAG: v_mac_f32_e32 [[VK]], [[VA]], [[VC]]
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
39 ; GCN: s_endpgm
|
121
|
40 define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
|
120
|
41 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
42
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
45 %in.gep.2 = getelementptr float, float addrspace(1)* %in.gep.0, i32 2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
47 %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
48 %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
49
|
120
|
50 %a = load volatile float, float addrspace(1)* %in.gep.0, align 4
|
|
51 %b = load volatile float, float addrspace(1)* %in.gep.1, align 4
|
|
52 %c = load volatile float, float addrspace(1)* %in.gep.2, align 4
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
53
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
54 %mul0 = fmul float %a, %b
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
55 %mul1 = fmul float %a, %c
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
56 %madak0 = fadd float %mul0, 10.0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
57 %madak1 = fadd float %mul1, 10.0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
58
|
120
|
59 store volatile float %madak0, float addrspace(1)* %out.gep.0, align 4
|
|
60 store volatile float %madak1, float addrspace(1)* %out.gep.1, align 4
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
61 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
62 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
63
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
64 ; GCN-LABEL: {{^}}madak_m_inline_imm_f32:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
65 ; GCN: buffer_load_dword [[VA:v[0-9]+]]
|
121
|
66 ; GCN: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
|
|
67 define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
|
120
|
68 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
69 %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
70 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
71
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
72 %a = load float, float addrspace(1)* %in.a.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
73
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
74 %mul = fmul float 4.0, %a
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
75 %madak = fadd float %mul, 10.0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
76 store float %madak, float addrspace(1)* %out.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
77 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
78 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
79
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
80 ; Make sure nothing weird happens with a value that is also allowed as
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
81 ; an inline immediate.
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
82
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
83 ; GCN-LABEL: {{^}}madak_inline_imm_f32:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
84 ; GCN: buffer_load_dword [[VA:v[0-9]+]]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
85 ; GCN: buffer_load_dword [[VB:v[0-9]+]]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
86 ; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
|
121
|
87 define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
|
120
|
88 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
89 %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
90 %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
91 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
92
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
93 %a = load float, float addrspace(1)* %in.a.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
94 %b = load float, float addrspace(1)* %in.b.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
95
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
96 %mul = fmul float %a, %b
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
97 %madak = fadd float %mul, 4.0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
98 store float %madak, float addrspace(1)* %out.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
99 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
100 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
101
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
102 ; We can't use an SGPR when forming madak
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
103 ; GCN-LABEL: {{^}}s_v_madak_f32:
|
120
|
104 ; GCN-DAG: s_load_dword [[SB:s[0-9]+]]
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
105 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
106 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
107 ; GCN-NOT: v_madak_f32
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
108 ; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
|
121
|
109 define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind {
|
120
|
110 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
111 %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
112 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
113
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
114 %a = load float, float addrspace(1)* %in.a.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
115
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
116 %mul = fmul float %a, %b
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
117 %madak = fadd float %mul, 10.0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
118 store float %madak, float addrspace(1)* %out.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
119 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
120 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
121
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
122 ; GCN-LABEL: @v_s_madak_f32
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
123 ; GCN-DAG: s_load_dword [[SB:s[0-9]+]]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
124 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
125 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
126 ; GCN-NOT: v_madak_f32
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
127 ; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
|
121
|
128 define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind {
|
120
|
129 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
130 %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
131 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
132
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
133 %b = load float, float addrspace(1)* %in.b.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
134
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
135 %mul = fmul float %a, %b
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
136 %madak = fadd float %mul, 10.0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
137 store float %madak, float addrspace(1)* %out.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
138 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
139 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
140
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
141 ; GCN-LABEL: {{^}}s_s_madak_f32:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
142 ; GCN-NOT: v_madak_f32
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
143 ; GCN: v_mac_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
121
|
144 define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
145 %mul = fmul float %a, %b
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
146 %madak = fadd float %mul, 10.0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
147 store float %madak, float addrspace(1)* %out, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
148 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
149 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
150
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
151 ; GCN-LABEL: {{^}}no_madak_src0_modifier_f32:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
152 ; GCN: buffer_load_dword [[VA:v[0-9]+]]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
153 ; GCN: buffer_load_dword [[VB:v[0-9]+]]
|
121
|
154 ; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
155 ; GCN: s_endpgm
|
121
|
156 define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
|
120
|
157 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
158 %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
159 %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
160 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
161
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
162 %a = load float, float addrspace(1)* %in.a.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
163 %b = load float, float addrspace(1)* %in.b.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
164
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
165 %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
166
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
167 %mul = fmul float %a.fabs, %b
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
168 %madak = fadd float %mul, 10.0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
169 store float %madak, float addrspace(1)* %out.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
170 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
171 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
172
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
173 ; GCN-LABEL: {{^}}no_madak_src1_modifier_f32:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
174 ; GCN: buffer_load_dword [[VA:v[0-9]+]]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
175 ; GCN: buffer_load_dword [[VB:v[0-9]+]]
|
121
|
176 ; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}}
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
177 ; GCN: s_endpgm
|
121
|
178 define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
|
120
|
179 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
95
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
180 %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
181 %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
182 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
183
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
184 %a = load float, float addrspace(1)* %in.a.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
185 %b = load float, float addrspace(1)* %in.b.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
186
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
187 %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
188
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
189 %mul = fmul float %a, %b.fabs
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
190 %madak = fadd float %mul, 10.0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
191 store float %madak, float addrspace(1)* %out.gep, align 4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
192 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
193 }
|
120
|
194
|
|
195 ; SIFoldOperands should not fold the SGPR copy into the instruction
|
|
196 ; because the implicit immediate already uses the constant bus.
|
|
197 ; GCN-LABEL: {{^}}madak_constant_bus_violation:
|
|
198 ; GCN: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}}
|
|
199 ; GCN: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
|
|
200 ; GCN: buffer_load_dword [[VGPR:v[0-9]+]]
|
121
|
201 ; GCN: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
|
|
202 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[MADAK]], [[VGPR]]
|
120
|
203 ; GCN: buffer_store_dword [[MUL]]
|
121
|
204 define amdgpu_kernel void @madak_constant_bus_violation(i32 %arg1, float %sgpr0, float %sgpr1) #0 {
|
120
|
205 bb:
|
|
206 %tmp = icmp eq i32 %arg1, 0
|
|
207 br i1 %tmp, label %bb3, label %bb4
|
|
208
|
|
209 bb3:
|
|
210 store volatile float 0.0, float addrspace(1)* undef
|
|
211 br label %bb4
|
|
212
|
|
213 bb4:
|
|
214 %vgpr = load volatile float, float addrspace(1)* undef
|
|
215 %tmp0 = fmul float %sgpr0, 0.5
|
|
216 %tmp1 = fadd float %tmp0, 42.0
|
|
217 %tmp2 = fmul float %tmp1, %vgpr
|
|
218 store volatile float %tmp2, float addrspace(1)* undef, align 4
|
|
219 ret void
|
|
220 }
|
|
221
|
|
222 attributes #0 = { nounwind}
|