Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/AMDGPU/madmk.ll @ 147:c2174574ed3a
LLVM 10
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 14 Aug 2019 16:55:33 +0900 |
parents | 803732b1fca8 |
children |
comparison
equal
deleted
inserted
replaced
134:3a76565eade5 | 147:c2174574ed3a |
---|---|
29 | 29 |
30 ; GCN-LABEL: {{^}}madmk_2_use_f32: | 30 ; GCN-LABEL: {{^}}madmk_2_use_f32: |
31 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} | 31 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} |
32 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 | 32 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 |
33 ; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 | 33 ; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 |
34 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 | 34 ; GCN-DAG: s_mov_b32 [[SK:s[0-9]+]], 0x41200000 |
35 ; GCN-DAG: v_mac_f32_e32 [[VB]], [[VA]], [[VK]] | 35 ; GCN-DAG: v_mac_f32_e32 [[VB]], [[SK]], [[VA]] |
36 ; GCN-DAG: v_mac_f32_e32 [[VC]], [[VA]], [[VK]] | 36 ; GCN-DAG: v_mac_f32_e32 [[VC]], [[SK]], [[VA]] |
37 ; GCN: s_endpgm | 37 ; GCN: s_endpgm |
38 define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { | 38 define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { |
39 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone | 39 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone |
40 | 40 |
41 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid | 41 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid |
81 | 81 |
82 ; GCN-LABEL: {{^}}s_s_madmk_f32: | 82 ; GCN-LABEL: {{^}}s_s_madmk_f32: |
83 ; GCN-NOT: v_madmk_f32 | 83 ; GCN-NOT: v_madmk_f32 |
84 ; GCN: v_mac_f32_e32 | 84 ; GCN: v_mac_f32_e32 |
85 ; GCN: s_endpgm | 85 ; GCN: s_endpgm |
86 define amdgpu_kernel void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b) nounwind { | 86 define amdgpu_kernel void @s_s_madmk_f32(float addrspace(1)* noalias %out, [8 x i32], float %a, [8 x i32], float %b) nounwind { |
87 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone | 87 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone |
88 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid | 88 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid |
89 | 89 |
90 %mul = fmul float %a, 10.0 | 90 %mul = fmul float %a, 10.0 |
91 %madmk = fadd float %mul, %b | 91 %madmk = fadd float %mul, %b |
92 store float %madmk, float addrspace(1)* %out.gep, align 4 | 92 store float %madmk, float addrspace(1)* %out.gep, align 4 |
93 ret void | 93 ret void |
94 } | 94 } |
95 | 95 |
96 ; GCN-LABEL: {{^}}v_s_madmk_f32: | 96 ; GCN-LABEL: {{^}}v_s_madmk_f32: |
97 ; GCN-NOT: v_madmk_f32 | 97 ; GCN: s_load_dword [[SREG:s[0-9]+]] |
98 ; GCN: v_mad_f32 | 98 ; GCN: buffer_load_dword [[VREG1:v[0-9]+]] |
99 ; GCN: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG]] | |
100 ; GCN: v_mac_f32_e32 [[VREG2]], 0x41200000, [[VREG1]] | |
99 ; GCN: s_endpgm | 101 ; GCN: s_endpgm |
100 define amdgpu_kernel void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind { | 102 define amdgpu_kernel void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind { |
101 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone | 103 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone |
102 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid | 104 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid |
103 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid | 105 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid |
126 } | 128 } |
127 | 129 |
128 ; GCN-LABEL: {{^}}no_madmk_src0_modifier_f32: | 130 ; GCN-LABEL: {{^}}no_madmk_src0_modifier_f32: |
129 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} | 131 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} |
130 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 | 132 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 |
131 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 | 133 ; GCN-DAG: s_mov_b32 [[SK:s[0-9]+]], 0x41200000 |
132 ; GCN: v_mad_f32 {{v[0-9]+}}, |[[VA]]|, [[VK]], [[VB]] | 134 ; GCN: v_mad_f32 {{v[0-9]+}}, |[[VA]]|, [[SK]], [[VB]] |
133 define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { | 135 define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { |
134 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone | 136 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone |
135 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid | 137 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid |
136 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 | 138 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 |
137 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid | 139 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid |
148 } | 150 } |
149 | 151 |
150 ; GCN-LABEL: {{^}}no_madmk_src2_modifier_f32: | 152 ; GCN-LABEL: {{^}}no_madmk_src2_modifier_f32: |
151 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} | 153 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} |
152 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 | 154 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 |
153 ; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, |{{[sv][0-9]+}}| | 155 ; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{[sv][0-9]+}}, |{{v[0-9]+}}| |
154 define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { | 156 define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { |
155 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone | 157 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone |
156 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid | 158 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid |
157 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 | 159 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 |
158 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid | 160 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid |
168 ret void | 170 ret void |
169 } | 171 } |
170 | 172 |
171 ; GCN-LABEL: {{^}}madmk_add_inline_imm_f32: | 173 ; GCN-LABEL: {{^}}madmk_add_inline_imm_f32: |
172 ; GCN: buffer_load_dword [[A:v[0-9]+]] | 174 ; GCN: buffer_load_dword [[A:v[0-9]+]] |
173 ; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 | 175 ; GCN: s_mov_b32 [[SK:s[0-9]+]], 0x41200000 |
174 ; GCN: v_mad_f32 {{v[0-9]+}}, [[A]], [[VK]], 2.0 | 176 ; GCN: v_mad_f32 {{v[0-9]+}}, [[A]], [[SK]], 2.0 |
175 define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { | 177 define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { |
176 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone | 178 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone |
177 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid | 179 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid |
178 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid | 180 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid |
179 | 181 |
184 store float %madmk, float addrspace(1)* %out.gep, align 4 | 186 store float %madmk, float addrspace(1)* %out.gep, align 4 |
185 ret void | 187 ret void |
186 } | 188 } |
187 | 189 |
188 ; SI-LABEL: {{^}}kill_madmk_verifier_error: | 190 ; SI-LABEL: {{^}}kill_madmk_verifier_error: |
191 ; SI: s_or_b64 | |
189 ; SI: s_xor_b64 | 192 ; SI: s_xor_b64 |
190 ; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}} | 193 ; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}} |
191 ; SI: s_or_b64 | |
192 define amdgpu_kernel void @kill_madmk_verifier_error() nounwind { | 194 define amdgpu_kernel void @kill_madmk_verifier_error() nounwind { |
193 bb: | 195 bb: |
194 br label %bb2 | 196 br label %bb2 |
195 | 197 |
196 bb1: ; preds = %bb2 | 198 bb1: ; preds = %bb2 |
203 %tmp3 = fsub float %f_tid, %tmp | 205 %tmp3 = fsub float %f_tid, %tmp |
204 %tmp5 = fcmp oeq float %tmp3, 1.000000e+04 | 206 %tmp5 = fcmp oeq float %tmp3, 1.000000e+04 |
205 br i1 %tmp5, label %bb1, label %bb6 | 207 br i1 %tmp5, label %bb1, label %bb6 |
206 | 208 |
207 bb6: ; preds = %bb2 | 209 bb6: ; preds = %bb2 |
208 %tmp4 = fmul float %tmp, undef | 210 %tmp7 = fmul float %tmp, 0x40E55DD180000000 |
209 %tmp7 = fmul float %tmp4, 0x40E55DD180000000 | 211 %tmp8 = fadd float %tmp7, %tmp |
210 %tmp8 = fadd float %tmp7, undef | |
211 br label %bb2 | 212 br label %bb2 |
212 } | 213 } |
213 | 214 |
214 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 | 215 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 |
215 | 216 |