121
|
1 ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VGPR %s
|
|
2 ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s
|
|
3 ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VMEM %s
|
|
4
|
|
5 ; ALL-LABEL: {{^}}spill_sgpr_x2:
|
|
6 ; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
7 ; SMEM: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:11], m0 ; 8-byte Folded Spill
|
|
8 ; SMEM: s_cbranch_scc1
|
|
9
|
|
10 ; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
11 ; SMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:11], m0 ; 8-byte Folded Reload
|
|
12
|
|
13 ; SMEM: s_dcache_wb
|
|
14 ; SMEM: s_endpgm
|
|
15
|
|
16 ; FIXME: Should only need 4 bytes
|
|
17 ; SMEM: ScratchSize: 12
|
|
18
|
|
19
|
|
20 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
|
21 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
|
22 ; VGPR: s_cbranch_scc1
|
|
23
|
|
24 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
|
|
25 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
|
|
26
|
|
27 ; VMEM: buffer_store_dword
|
|
28 ; VMEM: buffer_store_dword
|
|
29 ; VMEM: s_cbranch_scc1
|
|
30
|
|
31 ; VMEM: buffer_load_dword
|
|
32 ; VMEM: buffer_load_dword
|
|
33 define amdgpu_kernel void @spill_sgpr_x2(i32 addrspace(1)* %out, i32 %in) #0 {
|
|
34 %wide.sgpr = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
|
|
35 %cmp = icmp eq i32 %in, 0
|
|
36 br i1 %cmp, label %bb0, label %ret
|
|
37
|
|
38 bb0:
|
|
39 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr) #0
|
|
40 br label %ret
|
|
41
|
|
42 ret:
|
|
43 ret void
|
|
44 }
|
|
45
|
|
46 ; ALL-LABEL: {{^}}spill_sgpr_x4:
|
|
47 ; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
48 ; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[12:15], m0 ; 16-byte Folded Spill
|
|
49 ; SMEM: s_cbranch_scc1
|
|
50
|
|
51 ; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
52 ; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[12:15], m0 ; 16-byte Folded Reload
|
|
53 ; SMEM: s_dcache_wb
|
|
54 ; SMEM: s_endpgm
|
|
55
|
|
56 ; FIXME: Should only need 4 bytes
|
|
57 ; SMEM: ScratchSize: 20
|
|
58
|
|
59 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
|
60 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
|
61 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
|
|
62 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
|
|
63 ; VGPR: s_cbranch_scc1
|
|
64
|
|
65 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
|
|
66 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
|
|
67 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
|
|
68 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
|
|
69
|
|
70
|
|
71 ; VMEM: buffer_store_dword
|
|
72 ; VMEM: buffer_store_dword
|
|
73 ; VMEM: buffer_store_dword
|
|
74 ; VMEM: buffer_store_dword
|
|
75 ; VMEM: s_cbranch_scc1
|
|
76
|
|
77 ; VMEM: buffer_load_dword
|
|
78 ; VMEM: buffer_load_dword
|
|
79 ; VMEM: buffer_load_dword
|
|
80 ; VMEM: buffer_load_dword
|
|
81 define amdgpu_kernel void @spill_sgpr_x4(i32 addrspace(1)* %out, i32 %in) #0 {
|
|
82 %wide.sgpr = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
|
|
83 %cmp = icmp eq i32 %in, 0
|
|
84 br i1 %cmp, label %bb0, label %ret
|
|
85
|
|
86 bb0:
|
|
87 call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr) #0
|
|
88 br label %ret
|
|
89
|
|
90 ret:
|
|
91 ret void
|
|
92 }
|
|
93
|
|
94 ; ALL-LABEL: {{^}}spill_sgpr_x8:
|
|
95
|
|
96 ; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
97 ; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Spill
|
|
98 ; SMEM: s_add_u32 m0, s3, 0x110{{$}}
|
|
99 ; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Spill
|
|
100 ; SMEM: s_cbranch_scc1
|
|
101
|
|
102 ; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
103 ; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Reload
|
|
104 ; SMEM: s_add_u32 m0, s3, 0x110{{$}}
|
|
105 ; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Reload
|
|
106
|
|
107 ; SMEM: s_dcache_wb
|
|
108 ; SMEM: s_endpgm
|
|
109
|
|
110 ; SMEM: ScratchSize: 36
|
|
111
|
|
112 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
|
113 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
|
114 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
|
|
115 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
|
|
116 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
|
|
117 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
|
|
118 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
|
|
119 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
|
|
120 ; VGPR: s_cbranch_scc1
|
|
121
|
|
122 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
|
|
123 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
|
|
124 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
|
|
125 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
|
|
126 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
|
|
127 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
|
|
128 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
|
|
129 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
|
|
130
|
|
131 ; VMEM: buffer_store_dword
|
|
132 ; VMEM: buffer_store_dword
|
|
133 ; VMEM: buffer_store_dword
|
|
134 ; VMEM: buffer_store_dword
|
|
135 ; VMEM: buffer_store_dword
|
|
136 ; VMEM: buffer_store_dword
|
|
137 ; VMEM: buffer_store_dword
|
|
138 ; VMEM: buffer_store_dword
|
|
139 ; VMEM: s_cbranch_scc1
|
|
140
|
|
141 ; VMEM: buffer_load_dword
|
|
142 ; VMEM: buffer_load_dword
|
|
143 ; VMEM: buffer_load_dword
|
|
144 ; VMEM: buffer_load_dword
|
|
145 ; VMEM: buffer_load_dword
|
|
146 ; VMEM: buffer_load_dword
|
|
147 ; VMEM: buffer_load_dword
|
|
148 ; VMEM: buffer_load_dword
|
|
149 define amdgpu_kernel void @spill_sgpr_x8(i32 addrspace(1)* %out, i32 %in) #0 {
|
|
150 %wide.sgpr = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
|
|
151 %cmp = icmp eq i32 %in, 0
|
|
152 br i1 %cmp, label %bb0, label %ret
|
|
153
|
|
154 bb0:
|
|
155 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr) #0
|
|
156 br label %ret
|
|
157
|
|
158 ret:
|
|
159 ret void
|
|
160 }
|
|
161
|
|
162 ; FIXME: x16 inlineasm seems broken
|
|
163 ; define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 {
|
|
164 ; %wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
|
|
165 ; %cmp = icmp eq i32 %in, 0
|
|
166 ; br i1 %cmp, label %bb0, label %ret
|
|
167
|
|
168 ; bb0:
|
|
169 ; call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0
|
|
170 ; br label %ret
|
|
171
|
|
172 ; ret:
|
|
173 ; ret void
|
|
174 ; }
|
|
175
|
|
176 attributes #0 = { nounwind }
|