121
|
1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN %s
|
134
|
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN %s
|
121
|
3
|
|
4 ; Kernels are not called, so there is no call preserved mask.
|
|
5 ; GCN-LABEL: {{^}}kernel:
|
|
6 ; GCN: flat_store_dword
|
|
7 define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) #0 {
|
|
8 entry:
|
|
9 store i32 0, i32 addrspace(1)* %out
|
|
10 ret void
|
|
11 }
|
|
12
|
|
13 ; GCN-LABEL: {{^}}func:
|
|
14 ; GCN: ; NumVgprs: 8
|
|
15 define void @func() #1 {
|
|
16 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
|
|
17 ret void
|
|
18 }
|
|
19
|
|
20 ; GCN-LABEL: {{^}}kernel_call:
|
|
21 ; GCN-NOT: buffer_store
|
|
22 ; GCN-NOT: buffer_load
|
|
23 ; GCN-NOT: readlane
|
|
24 ; GCN-NOT: writelane
|
|
25 ; GCN: flat_load_dword v8
|
|
26 ; GCN: s_swappc_b64
|
|
27 ; GCN-NOT: buffer_store
|
|
28 ; GCN-NOT: buffer_load
|
|
29 ; GCN-NOT: readlane
|
|
30 ; GCN-NOT: writelane
|
|
31 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
|
|
32
|
|
33 ; GCN: ; NumSgprs: 37
|
|
34 ; GCN: ; NumVgprs: 9
|
|
35 define amdgpu_kernel void @kernel_call() #0 {
|
|
36 %vgpr = load volatile i32, i32 addrspace(1)* undef
|
|
37 tail call void @func()
|
|
38 store volatile i32 %vgpr, i32 addrspace(1)* undef
|
|
39 ret void
|
|
40 }
|
|
41
|
|
42 ; GCN-LABEL: {{^}}func_regular_call:
|
|
43 ; GCN-NOT: buffer_store
|
|
44 ; GCN-NOT: buffer_load
|
|
45 ; GCN-NOT: readlane
|
|
46 ; GCN-NOT: writelane
|
|
47 ; GCN: flat_load_dword v8
|
|
48 ; GCN: s_swappc_b64
|
|
49 ; GCN-NOT: buffer_store
|
|
50 ; GCN-NOT: buffer_load
|
|
51 ; GCN-NOT: readlane
|
|
52 ; GCN-NOT: writelane
|
|
53 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
|
|
54
|
|
55 ; GCN: ; NumSgprs: 32
|
|
56 ; GCN: ; NumVgprs: 9
|
|
57 define void @func_regular_call() #1 {
|
|
58 %vgpr = load volatile i32, i32 addrspace(1)* undef
|
|
59 tail call void @func()
|
|
60 store volatile i32 %vgpr, i32 addrspace(1)* undef
|
|
61 ret void
|
|
62 }
|
|
63
|
|
64 ; GCN-LABEL: {{^}}func_tail_call:
|
|
65 ; GCN: s_waitcnt
|
|
66 ; GCN-NEXT: s_getpc_b64 s[6:7]
|
|
67 ; GCN-NEXT: s_add_u32 s6,
|
|
68 ; GCN-NEXT: s_addc_u32 s7,
|
|
69 ; GCN-NEXT: s_setpc_b64 s[6:7]
|
|
70
|
|
71 ; GCN: ; NumSgprs: 32
|
|
72 ; GCN: ; NumVgprs: 8
|
|
73 define void @func_tail_call() #1 {
|
|
74 tail call void @func()
|
|
75 ret void
|
|
76 }
|
|
77
|
|
78 ; GCN-LABEL: {{^}}func_call_tail_call:
|
|
79 ; GCN: flat_load_dword v8
|
|
80 ; GCN: s_swappc_b64
|
|
81 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
|
|
82 ; GCN: s_setpc_b64
|
|
83
|
|
84 ; GCN: ; NumSgprs: 32
|
|
85 ; GCN: ; NumVgprs: 9
|
|
86 define void @func_call_tail_call() #1 {
|
|
87 %vgpr = load volatile i32, i32 addrspace(1)* undef
|
|
88 tail call void @func()
|
|
89 store volatile i32 %vgpr, i32 addrspace(1)* undef
|
|
90 tail call void @func()
|
|
91 ret void
|
|
92 }
|
|
93
|
|
94 define void @void_func_void() noinline {
|
|
95 ret void
|
|
96 }
|
|
97
|
|
98 ; Make sure we don't get save/restore of FP between calls.
|
|
99 ; GCN-LABEL: {{^}}test_funcx2:
|
|
100 ; GCN-NOT: s5
|
|
101 ; GCN-NOT: s32
|
|
102 define void @test_funcx2() #0 {
|
|
103 call void @void_func_void()
|
|
104 call void @void_func_void()
|
|
105 ret void
|
|
106 }
|
|
107
|
|
108 attributes #0 = { nounwind }
|
|
109 attributes #1 = { nounwind noinline }
|