Mercurial > hg > CbC > CbC_llvm
comparison llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll @ 150:1d019706d866
LLVM10
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 15:10:13 +0900 |
parents | |
children | 2e18cbf3894f |
comparison
equal
deleted
inserted
replaced
147:c2174574ed3a | 150:1d019706d866 |
---|---|
1 ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906 | |
2 ; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 | |
3 ; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 | |
4 | |
5 declare i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 %clamp) | |
6 | |
7 ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_clamp | |
8 ; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} | |
9 ; GFX10: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} | |
10 define amdgpu_kernel void @test_llvm_amdgcn_udot2_clamp( | |
11 i32 addrspace(1)* %r, | |
12 <2 x i16> addrspace(1)* %a, | |
13 <2 x i16> addrspace(1)* %b, | |
14 i32 addrspace(1)* %c) { | |
15 entry: | |
16 %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a | |
17 %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b | |
18 %c.val = load i32, i32 addrspace(1)* %c | |
19 %r.val = call i32 @llvm.amdgcn.udot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val, i1 1) | |
20 store i32 %r.val, i32 addrspace(1)* %r | |
21 ret void | |
22 } | |
23 | |
24 ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_no_clamp | |
25 ; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} | |
26 ; GFX10: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} | |
27 define amdgpu_kernel void @test_llvm_amdgcn_udot2_no_clamp( | |
28 i32 addrspace(1)* %r, | |
29 <2 x i16> addrspace(1)* %a, | |
30 <2 x i16> addrspace(1)* %b, | |
31 i32 addrspace(1)* %c) { | |
32 entry: | |
33 %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a | |
34 %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b | |
35 %c.val = load i32, i32 addrspace(1)* %c | |
36 %r.val = call i32 @llvm.amdgcn.udot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val, i1 0) | |
37 store i32 %r.val, i32 addrspace(1)* %r | |
38 ret void | |
39 } |