annotate test/CodeGen/AMDGPU/mad-combine.ll @ 128:c347d3398279 default tip

fix
author mir3636
date Wed, 06 Dec 2017 14:37:17 +0900
parents 803732b1fca8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 ; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma.
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
3 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
4 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
5 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
6
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 ; Make sure we don't form mad with denormals
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
8 ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
9 ; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
10
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
11 declare i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 declare float @llvm.fabs.f32(float) #0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 declare float @llvm.fma.f32(float, float, float) #0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 declare float @llvm.fmuladd.f32(float, float, float) #0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
15
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 ; (fadd (fmul x, y), z) -> (fma x, y, z)
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 ; FUNC-LABEL: {{^}}combine_to_mad_f32_0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
21
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
22 ; SI-STD: v_mac_f32_e32 [[C]], [[A]], [[B]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
23
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
24 ; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
25
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 ; SI-DENORM-SLOWFMAF-NOT: v_fma
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 ; SI-DENORM-SLOWFMAF-NOT: v_mad
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
28
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
29 ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
30 ; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
31
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 ; SI-DENORM: buffer_store_dword [[RESULT]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 ; SI-STD: buffer_store_dword [[C]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
34 define amdgpu_kernel void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
35 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
40
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
41 %a = load volatile float, float addrspace(1)* %gep.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
42 %b = load volatile float, float addrspace(1)* %gep.1
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
43 %c = load volatile float, float addrspace(1)* %gep.2
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
44
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 %mul = fmul float %a, %b
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 %fma = fadd float %mul, %c
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
47 store float %fma, float addrspace(1)* %gep.out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
48 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
50
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
51 ; (fadd (fmul x, y), z) -> (fma x, y, z)
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 ; FUNC-LABEL: {{^}}combine_to_mad_f32_0_2use:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 ; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
57
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
58 ; SI-STD-DAG: v_mac_f32_e32 [[C]], [[A]], [[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
59 ; SI-STD-DAG: v_mac_f32_e32 [[D]], [[A]], [[B]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
60
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
61 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
62 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
63
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
64 ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
65 ; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
66 ; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
67
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
68 ; SI-DENORM-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
69 ; SI-DENORM-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
70 ; SI-STD-DAG: buffer_store_dword [[C]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 ; SI-STD-DAG: buffer_store_dword [[D]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 ; SI: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
73 define amdgpu_kernel void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
74 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
76 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
77 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
78 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
79 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
80 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
81
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
82 %a = load volatile float, float addrspace(1)* %gep.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
83 %b = load volatile float, float addrspace(1)* %gep.1
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
84 %c = load volatile float, float addrspace(1)* %gep.2
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
85 %d = load volatile float, float addrspace(1)* %gep.3
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
86
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
87 %mul = fmul float %a, %b
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
88 %fma0 = fadd float %mul, %c
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
89 %fma1 = fadd float %mul, %d
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
90
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
91 store volatile float %fma0, float addrspace(1)* %gep.out.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
92 store volatile float %fma1, float addrspace(1)* %gep.out.1
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
93 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
94 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
96 ; (fadd x, (fmul y, z)) -> (fma y, z, x)
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
97 ; FUNC-LABEL: {{^}}combine_to_mad_f32_1:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
98 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
99 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
100 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
101
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
102 ; SI-STD: v_mac_f32_e32 [[C]], [[A]], [[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
103 ; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
104
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
105 ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
106 ; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
107
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
108 ; SI-DENORM: buffer_store_dword [[RESULT]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
109 ; SI-STD: buffer_store_dword [[C]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
110 define amdgpu_kernel void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
111 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
112 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
113 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
114 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
115 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
116
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
117 %a = load volatile float, float addrspace(1)* %gep.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
118 %b = load volatile float, float addrspace(1)* %gep.1
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
119 %c = load volatile float, float addrspace(1)* %gep.2
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
120
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
121 %mul = fmul float %a, %b
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
122 %fma = fadd float %c, %mul
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
123 store float %fma, float addrspace(1)* %gep.out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
124 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
125 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
126
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
127 ; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
128 ; FUNC-LABEL: {{^}}combine_to_mad_fsub_0_f32:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
129 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
130 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
131 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
132
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
133 ; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
134 ; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
135
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
136 ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
137 ; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
138
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
139 ; SI: buffer_store_dword [[RESULT]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
140 define amdgpu_kernel void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
141 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
142 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
143 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
144 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
145 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
146
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
147 %a = load volatile float, float addrspace(1)* %gep.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
148 %b = load volatile float, float addrspace(1)* %gep.1
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
149 %c = load volatile float, float addrspace(1)* %gep.2
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
150
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
151 %mul = fmul float %a, %b
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
152 %fma = fsub float %mul, %c
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
153 store float %fma, float addrspace(1)* %gep.out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
154 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
155 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
156
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
157 ; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
158 ; FUNC-LABEL: {{^}}combine_to_mad_fsub_0_f32_2use:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
159 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
160 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
161 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
162 ; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
163
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
164 ; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
165 ; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
166
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
167 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
168 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
169
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
170 ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
171 ; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
172 ; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
173
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
174 ; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
175 ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
176 ; SI: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
177 define amdgpu_kernel void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
178 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
179 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
180 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
181 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
182 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
183 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
184 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
185
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
186 %a = load volatile float, float addrspace(1)* %gep.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
187 %b = load volatile float, float addrspace(1)* %gep.1
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
188 %c = load volatile float, float addrspace(1)* %gep.2
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
189 %d = load volatile float, float addrspace(1)* %gep.3
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
190
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
191 %mul = fmul float %a, %b
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
192 %fma0 = fsub float %mul, %c
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
193 %fma1 = fsub float %mul, %d
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
194 store volatile float %fma0, float addrspace(1)* %gep.out.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
195 store volatile float %fma1, float addrspace(1)* %gep.out.1
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
196 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
197 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
198
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
199 ; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
200 ; FUNC-LABEL: {{^}}combine_to_mad_fsub_1_f32:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
201 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
202 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
203 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
204
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
205 ; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
206 ; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
207
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
208 ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
209 ; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
210
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
211 ; SI: buffer_store_dword [[RESULT]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
212 define amdgpu_kernel void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
213 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
214 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
215 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
216 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
217 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
218
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
219 %a = load volatile float, float addrspace(1)* %gep.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
220 %b = load volatile float, float addrspace(1)* %gep.1
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
221 %c = load volatile float, float addrspace(1)* %gep.2
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
222
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
223 %mul = fmul float %a, %b
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
224 %fma = fsub float %c, %mul
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
225 store float %fma, float addrspace(1)* %gep.out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
226 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
227 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
228
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
229 ; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
230 ; FUNC-LABEL: {{^}}combine_to_mad_fsub_1_f32_2use:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
231 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
232 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
233 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
234
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
235 ; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
236 ; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
237
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
238 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
239 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
240
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
241 ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
242 ; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
243 ; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
244
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
245 ; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
246 ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
247 ; SI: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
248 define amdgpu_kernel void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
249 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
250 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
251 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
252 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
253 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
254 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
255 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
256
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
257 %a = load volatile float, float addrspace(1)* %gep.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
258 %b = load volatile float, float addrspace(1)* %gep.1
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
259 %c = load volatile float, float addrspace(1)* %gep.2
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
260 %d = load volatile float, float addrspace(1)* %gep.3
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
261
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
262 %mul = fmul float %a, %b
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
263 %fma0 = fsub float %c, %mul
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
264 %fma1 = fsub float %d, %mul
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
265 store volatile float %fma0, float addrspace(1)* %gep.out.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
266 store volatile float %fma1, float addrspace(1)* %gep.out.1
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
267 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
268 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
269
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
270 ; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
271 ; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
272 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
273 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
274 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
275
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
276 ; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
277
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
278 ; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
279
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
280 ; SI-DENORM-SLOWFMAF: v_mul_f32_e64 [[TMP:v[0-9]+]], [[A]], -[[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
281 ; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
282
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
283 ; SI: buffer_store_dword [[RESULT]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
284 define amdgpu_kernel void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
285 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
286 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
287 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
288 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
289 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
290
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
291 %a = load volatile float, float addrspace(1)* %gep.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
292 %b = load volatile float, float addrspace(1)* %gep.1
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
293 %c = load volatile float, float addrspace(1)* %gep.2
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
294
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
295 %mul = fmul float %a, %b
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
296 %mul.neg = fsub float -0.0, %mul
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
297 %fma = fsub float %mul.neg, %c
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
298
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
299 store float %fma, float addrspace(1)* %gep.out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
300 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
301 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
302
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
303 ; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
304 ; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32_2uses_neg:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
305 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
306 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
307 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
308
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
309 ; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], -[[B]], -[[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
310 ; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], -[[B]], -[[D]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
311
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
312 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
313 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
314
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
315 ; SI-DENORM-SLOWFMAF: v_mul_f32_e64 [[TMP:v[0-9]+]], [[A]], -[[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
316 ; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
317 ; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
318
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
319 ; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
320 ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
321 ; SI: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
322 define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
323 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
324 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
325 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
326 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
327 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
328 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
329 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
330
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
331 %a = load volatile float, float addrspace(1)* %gep.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
332 %b = load volatile float, float addrspace(1)* %gep.1
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
333 %c = load volatile float, float addrspace(1)* %gep.2
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
334 %d = load volatile float, float addrspace(1)* %gep.3
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
335
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
336 %mul = fmul float %a, %b
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
337 %mul.neg = fsub float -0.0, %mul
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
338 %fma0 = fsub float %mul.neg, %c
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
339 %fma1 = fsub float %mul.neg, %d
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
340
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
341 store volatile float %fma0, float addrspace(1)* %gep.out.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
342 store volatile float %fma1, float addrspace(1)* %gep.out.1
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
343 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
344 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
345
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
346 ; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
347 ; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32_2uses_mul:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
348 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
349 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
350 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
351
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
352 ; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
353 ; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
354
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
355 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
356 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
357
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
358 ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
359 ; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
360 ; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
361
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
362 ; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
363 ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
364 ; SI: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
365 define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
366 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
367 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
368 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
369 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
370 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
371 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
372 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
373
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
374 %a = load volatile float, float addrspace(1)* %gep.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
375 %b = load volatile float, float addrspace(1)* %gep.1
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
376 %c = load volatile float, float addrspace(1)* %gep.2
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
377 %d = load volatile float, float addrspace(1)* %gep.3
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
378
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
379 %mul = fmul float %a, %b
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
380 %mul.neg = fsub float -0.0, %mul
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
381 %fma0 = fsub float %mul.neg, %c
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
382 %fma1 = fsub float %mul, %d
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
383
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
384 store volatile float %fma0, float addrspace(1)* %gep.out.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
385 store volatile float %fma1, float addrspace(1)* %gep.out.1
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
386 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
387 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
388
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
389 ; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
390
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
391 ; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_0_f32:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
392 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
393 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
394 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
395 ; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
396 ; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
397
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
398 ; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
399 ; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
400 ; SI-STD: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
401
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
402 ; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
403 ; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
404 ; SI-DENORM: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[TMP1]], [[C]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
405
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
406 ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
407 define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
408 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
409 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
410 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
411 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
412 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
413 %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
414 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
415
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
416 %x = load volatile float, float addrspace(1)* %gep.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
417 %y = load volatile float, float addrspace(1)* %gep.1
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
418 %z = load volatile float, float addrspace(1)* %gep.2
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
419 %u = load volatile float, float addrspace(1)* %gep.3
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
420 %v = load volatile float, float addrspace(1)* %gep.4
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
421
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
422 %tmp0 = fmul float %u, %v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
423 %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
424 %tmp2 = fsub float %tmp1, %z
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
425
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
426 store float %tmp2, float addrspace(1)* %gep.out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
427 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
428 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
429
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
430 ; fold (fsub x, (fma y, z, (fmul u, v)))
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
431 ; -> (fma (fneg y), z, (fma (fneg u), v, x))
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
432
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
433 ; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_1_f32:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
434 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
435 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
436 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
437 ; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
438 ; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
439
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
440 ; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
441 ; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
442 ; SI-STD: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP1]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
443
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
444 ; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
445 ; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
446 ; SI-DENORM: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP1]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
447
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
448 ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
449 ; SI: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
450 define amdgpu_kernel void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
451 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
452 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
453 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
454 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
455 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
456 %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
457 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
458
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
459 %x = load volatile float, float addrspace(1)* %gep.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
460 %y = load volatile float, float addrspace(1)* %gep.1
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
461 %z = load volatile float, float addrspace(1)* %gep.2
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
462 %u = load volatile float, float addrspace(1)* %gep.3
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
463 %v = load volatile float, float addrspace(1)* %gep.4
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
464
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
465 %tmp0 = fmul float %u, %v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
466 %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
467 %tmp2 = fsub float %x, %tmp1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
468
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
469 store float %tmp2, float addrspace(1)* %gep.out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
470 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
471 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
472
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
473 ; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
474
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
475 ; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_2_f32:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
476 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
477 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
478 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
479 ; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
480 ; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
481
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
482 ; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
483 ; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[A]], [[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
484 ; SI-STD-SAFE: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP0]], [[C]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
485
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
486 ; SI-STD-UNSAFE: v_mad_f32 [[RESULT:v[0-9]+]], [[D]], [[E]], -[[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
487 ; SI-STD-UNSAFE: v_mac_f32_e32 [[RESULT]], [[A]], [[B]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
488
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
489 ; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
490 ; SI-DENORM-FASTFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
491 ; SI-DENORM-FASTFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
492
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
493 ; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
494 ; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[A]], [[B]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
495 ; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP1]], [[TMP0]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
496 ; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP2]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
497
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
498 ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
499 ; SI: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
500 define amdgpu_kernel void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
501 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
502 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
503 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
504 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
505 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
506 %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
507 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
508
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
509 %x = load volatile float, float addrspace(1)* %gep.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
510 %y = load volatile float, float addrspace(1)* %gep.1
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
511 %z = load volatile float, float addrspace(1)* %gep.2
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
512 %u = load volatile float, float addrspace(1)* %gep.3
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
513 %v = load volatile float, float addrspace(1)* %gep.4
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
514
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
515 %tmp0 = fmul float %u, %v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
516 %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
517 %tmp2 = fsub float %tmp1, %z
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
518
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
519 store float %tmp2, float addrspace(1)* %gep.out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
520 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
521 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
522
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
523 ; fold (fsub x, (fmuladd y, z, (fmul u, v)))
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
524 ; -> (fmuladd (fneg y), z, (fmuladd (fneg u), v, x))
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
525
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
526 ; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_3_f32:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
527 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
528 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
529 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
530 ; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
531 ; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
532
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
533 ; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
534 ; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[B]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
535 ; SI-STD-SAFE: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP0]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
536
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
537 ; SI-STD-UNSAFE: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
538 ; SI-STD-UNSAFE: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
539
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
540 ; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
541 ; SI-DENORM-FASTFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
542 ; SI-DENORM-FASTFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP1]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
543
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
544 ; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
545 ; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[B]], [[C]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
546 ; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP1]], [[TMP0]]
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
547 ; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP2]]
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
548
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
549 ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
550 ; SI: s_endpgm
121
803732b1fca8 LLVM 5.0
kono
parents: 120
diff changeset
551 define amdgpu_kernel void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
552 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
553 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
554 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
555 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
556 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
557 %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
558 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
559
120
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
560 %x = load volatile float, float addrspace(1)* %gep.0
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
561 %y = load volatile float, float addrspace(1)* %gep.1
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
562 %z = load volatile float, float addrspace(1)* %gep.2
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
563 %u = load volatile float, float addrspace(1)* %gep.3
1172e4bd9c6f update 4.0.0
mir3636
parents: 95
diff changeset
564 %v = load volatile float, float addrspace(1)* %gep.4
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
565
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
566 %tmp0 = fmul float %u, %v
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
567 %tmp1 = call float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
568 %tmp2 = fsub float %x, %tmp1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
569
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
570 store float %tmp2, float addrspace(1)* %gep.out
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
571 ret void
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
572 }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
573
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
574 attributes #0 = { nounwind readnone }
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff changeset
575 attributes #1 = { nounwind }