150
|
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX678,HAS-ATOMICS %s
|
|
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,HAS-ATOMICS %s
|
|
3 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX678,NO-ATOMICS %s
|
|
4 ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX678,NO-ATOMICS %s
|
|
5
|
|
6 ; GCN-LABEL: {{^}}lds_atomic_fadd_ret_f32:
|
|
7 ; GFX678-DAG: s_mov_b32 m0
|
|
8 ; GFX9-NOT: m0
|
|
9 ; HAS-ATOMICS-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 4.0
|
|
10 ; HAS-ATOMICS: ds_add_rtn_f32 v0, v0, [[K]]
|
|
11
|
|
12 ; NO-ATOMICS: ds_read_b32
|
|
13 ; NO-ATOMICS: v_add_f32
|
|
14 ; NO-ATOMICS: ds_cmpst_rtn_b32
|
|
15 ; NO-ATOMICS: s_cbranch_execnz
|
|
16 define float @lds_atomic_fadd_ret_f32(float addrspace(3)* %ptr) nounwind {
|
|
17 %result = atomicrmw fadd float addrspace(3)* %ptr, float 4.0 seq_cst
|
|
18 ret float %result
|
|
19 }
|
|
20
|
|
21 ; GCN-LABEL: {{^}}lds_atomic_fadd_noret_f32:
|
|
22 ; GFX678-DAG: s_mov_b32 m0
|
|
23 ; GFX9-NOT: m0
|
|
24 ; HAS-ATOMICS-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 4.0
|
|
25 ; HAS-ATOMICS: ds_add_f32 v0, [[K]]
|
|
26 define void @lds_atomic_fadd_noret_f32(float addrspace(3)* %ptr) nounwind {
|
|
27 %result = atomicrmw fadd float addrspace(3)* %ptr, float 4.0 seq_cst
|
|
28 ret void
|
|
29 }
|
|
30
|
|
31 ; GCN-LABEL: {{^}}lds_ds_fadd:
|
|
32 ; VI-DAG: s_mov_b32 m0
|
|
33 ; GFX9-NOT: m0
|
|
34 ; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
|
|
35 ; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
|
|
36 ; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
|
|
37 ; HAS-ATOMICS: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
38 ; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
|
|
39 define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
|
|
40 %idx.add = add nuw i32 %idx, 4
|
|
41 %shl0 = shl i32 %idx.add, 3
|
|
42 %shl1 = shl i32 %idx.add, 4
|
|
43 %ptr0 = inttoptr i32 %shl0 to float addrspace(3)*
|
|
44 %ptr1 = inttoptr i32 %shl1 to float addrspace(3)*
|
|
45 %a1 = atomicrmw fadd float addrspace(3)* %ptr0, float 4.2e+1 seq_cst
|
|
46 %a2 = atomicrmw fadd float addrspace(3)* %ptr1, float 4.2e+1 seq_cst
|
|
47 %a3 = atomicrmw fadd float addrspace(3)* %ptrf, float %a1 seq_cst
|
|
48 store float %a3, float addrspace(1)* %out
|
|
49 ret void
|
|
50 }
|
|
51
|
|
52 ; GCN-LABEL: {{^}}lds_ds_fadd_one_as:
|
|
53 ; VI-DAG: s_mov_b32 m0
|
|
54 ; GFX9-NOT: m0
|
|
55 ; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
|
|
56 ; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
|
|
57 ; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
|
|
58 ; HAS-ATOMICS: s_waitcnt lgkmcnt(1)
|
|
59 ; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
|
|
60 define amdgpu_kernel void @lds_ds_fadd_one_as(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
|
|
61 %idx.add = add nuw i32 %idx, 4
|
|
62 %shl0 = shl i32 %idx.add, 3
|
|
63 %shl1 = shl i32 %idx.add, 4
|
|
64 %ptr0 = inttoptr i32 %shl0 to float addrspace(3)*
|
|
65 %ptr1 = inttoptr i32 %shl1 to float addrspace(3)*
|
|
66 %a1 = atomicrmw fadd float addrspace(3)* %ptr0, float 4.2e+1 syncscope("one-as") seq_cst
|
|
67 %a2 = atomicrmw fadd float addrspace(3)* %ptr1, float 4.2e+1 syncscope("one-as") seq_cst
|
|
68 %a3 = atomicrmw fadd float addrspace(3)* %ptrf, float %a1 syncscope("one-as") seq_cst
|
|
69 store float %a3, float addrspace(1)* %out
|
|
70 ret void
|
|
71 }
|
|
72
|
|
73 ; GCN-LABEL: {{^}}lds_atomic_fadd_ret_f64:
|
|
74 ; GCN: ds_read_b64
|
|
75 ; GCN: v_add_f64
|
|
76 ; GCN: ds_cmpst_rtn_b64
|
|
77 ; GCN: s_cbranch_execnz
|
|
78 define double @lds_atomic_fadd_ret_f64(double addrspace(3)* %ptr) nounwind {
|
|
79 %result = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
|
|
80 ret double %result
|
|
81 }
|
|
82
|
|
83 ; GCN-LABEL: {{^}}lds_atomic_fadd_noret_f64:
|
|
84 ; GCN: ds_read_b64
|
|
85 ; GCN: v_add_f64
|
|
86 ; GCN: ds_cmpst_rtn_b64
|
|
87 ; GCN: s_cbranch_execnz
|
|
88 define void @lds_atomic_fadd_noret_f64(double addrspace(3)* %ptr) nounwind {
|
|
89 %result = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
|
|
90 ret void
|
|
91 }
|
|
92
|
|
93 ; GCN-LABEL: {{^}}lds_atomic_fsub_ret_f32:
|
|
94 ; GCN: ds_read_b32
|
|
95 ; GCN: v_sub_f32
|
|
96 ; GCN: ds_cmpst_rtn_b32
|
|
97 ; GCN: s_cbranch_execnz
|
|
98 define float @lds_atomic_fsub_ret_f32(float addrspace(3)* %ptr, float %val) nounwind {
|
|
99 %result = atomicrmw fsub float addrspace(3)* %ptr, float %val seq_cst
|
|
100 ret float %result
|
|
101 }
|
|
102
|
|
103 ; GCN-LABEL: {{^}}lds_atomic_fsub_noret_f32:
|
|
104 ; GCN: ds_read_b32
|
|
105 ; GCN: v_sub_f32
|
|
106 ; GCN: ds_cmpst_rtn_b32
|
|
107 define void @lds_atomic_fsub_noret_f32(float addrspace(3)* %ptr, float %val) nounwind {
|
|
108 %result = atomicrmw fsub float addrspace(3)* %ptr, float %val seq_cst
|
|
109 ret void
|
|
110 }
|
|
111
|
|
112 ; GCN-LABEL: {{^}}lds_atomic_fsub_ret_f64:
|
|
113 ; GCN: ds_read_b64
|
|
114 ; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}}
|
|
115 ; GCN: ds_cmpst_rtn_b64
|
|
116
|
|
117 define double @lds_atomic_fsub_ret_f64(double addrspace(3)* %ptr, double %val) nounwind {
|
|
118 %result = atomicrmw fsub double addrspace(3)* %ptr, double %val seq_cst
|
|
119 ret double %result
|
|
120 }
|
|
121
|
|
122 ; GCN-LABEL: {{^}}lds_atomic_fsub_noret_f64:
|
|
123 ; GCN: ds_read_b64
|
|
124 ; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}}
|
|
125 ; GCN: ds_cmpst_rtn_b64
|
|
126 ; GCN: s_cbranch_execnz
|
|
127 define void @lds_atomic_fsub_noret_f64(double addrspace(3)* %ptr, double %val) nounwind {
|
|
128 %result = atomicrmw fsub double addrspace(3)* %ptr, double %val seq_cst
|
|
129 ret void
|
|
130 }
|