comparison llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll @ 207:2e18cbf3894f

LLVM12
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 08 Jun 2021 06:07:14 +0900
parents 0572611fdcc8
children 5f17cb93ff66
comparison
equal deleted inserted replaced
173:0572611fdcc8 207:2e18cbf3894f
1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s 1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MUBUF %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-MUBUF,MUBUF %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -amdgpu-sroa=0 -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLATSCR %s
3 4
4 ; Test that non-entry function frame indices are expanded properly to 5 ; Test that non-entry function frame indices are expanded properly to
5 ; give an index relative to the scratch wave offset register 6 ; give an index relative to the scratch wave offset register
6 7
7 ; Materialize into a mov. Make sure there isn't an unnecessary copy. 8 ; Materialize into a mov. Make sure there isn't an unnecessary copy.
8 ; GCN-LABEL: {{^}}func_mov_fi_i32: 9 ; GCN-LABEL: {{^}}func_mov_fi_i32:
9 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10 11
11 ; CI-NEXT: v_lshr_b32_e64 v0, s32, 6 12 ; CI-NEXT: v_lshr_b32_e64 v0, s32, 6
12 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s32 13 ; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32
13 14
14 ; GCN-NOT: v_mov 15 ; GFX9-FLATSCR: v_mov_b32_e32 v0, s32
16 ; GFX9-FLATSCR-NOT: v_lshrrev_b32_e64
17
18 ; MUBUF-NOT: v_mov
19
15 ; GCN: ds_write_b32 v0, v0 20 ; GCN: ds_write_b32 v0, v0
16 define void @func_mov_fi_i32() #0 { 21 define void @func_mov_fi_i32() #0 {
17 %alloca = alloca i32, addrspace(5) 22 %alloca = alloca i32, addrspace(5)
18 store volatile i32 addrspace(5)* %alloca, i32 addrspace(5)* addrspace(3)* undef 23 store volatile i32 addrspace(5)* %alloca, i32 addrspace(5)* addrspace(3)* undef
19 ret void 24 ret void
28 ; CI: ds_write_b32 v0, v0 33 ; CI: ds_write_b32 v0, v0
29 ; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6 34 ; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6
30 ; CI-NEXT: v_add_i32_e{{32|64}} v0, {{s\[[0-9]+:[0-9]+\]|vcc}}, 4, [[SCALED]] 35 ; CI-NEXT: v_add_i32_e{{32|64}} v0, {{s\[[0-9]+:[0-9]+\]|vcc}}, 4, [[SCALED]]
31 ; CI-NEXT: ds_write_b32 v0, v0 36 ; CI-NEXT: ds_write_b32 v0, v0
32 37
33 ; GFX9: v_lshrrev_b32_e64 v0, 6, s32 38 ; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32
34 ; GFX9-NEXT: ds_write_b32 v0, v0 39 ; GFX9-FLATSCR: v_mov_b32_e32 v0, s32
35 ; GFX9-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 40 ; GFX9-FLATSCR: s_add_u32 [[ADD:[^,]+]], s32, 4
36 ; GFX9-NEXT: v_add_u32_e32 v0, 4, [[SCALED]] 41 ; GFX9-NEXT: ds_write_b32 v0, v0
37 ; GFX9-NEXT: ds_write_b32 v0, v0 42 ; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32
43 ; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, [[SCALED]]
44 ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, [[ADD]]
45 ; GFX9-NEXT: ds_write_b32 v0, v0
38 define void @func_mov_fi_i32_offset() #0 { 46 define void @func_mov_fi_i32_offset() #0 {
39 %alloca0 = alloca i32, addrspace(5) 47 %alloca0 = alloca i32, addrspace(5)
40 %alloca1 = alloca i32, addrspace(5) 48 %alloca1 = alloca i32, addrspace(5)
41 store volatile i32 addrspace(5)* %alloca0, i32 addrspace(5)* addrspace(3)* undef 49 store volatile i32 addrspace(5)* %alloca0, i32 addrspace(5)* addrspace(3)* undef
42 store volatile i32 addrspace(5)* %alloca1, i32 addrspace(5)* addrspace(3)* undef 50 store volatile i32 addrspace(5)* %alloca1, i32 addrspace(5)* addrspace(3)* undef
50 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51 59
52 ; CI: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6 60 ; CI: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6
53 ; CI-NEXT: v_add_i32_e32 v0, vcc, 4, [[SCALED]] 61 ; CI-NEXT: v_add_i32_e32 v0, vcc, 4, [[SCALED]]
54 62
55 ; GFX9: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 63 ; GFX9-MUBUF: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32
56 ; GFX9-NEXT: v_add_u32_e32 v0, 4, [[SCALED]] 64 ; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, [[SCALED]]
65
66 ; GFX9-FLATSCR: v_mov_b32_e32 [[ADD:v[0-9]+]], s32
67 ; GFX9-FLATSCR-NEXT: v_add_u32_e32 v0, 4, [[ADD]]
57 68
58 ; GCN-NOT: v_mov 69 ; GCN-NOT: v_mov
59 ; GCN: ds_write_b32 v0, v0 70 ; GCN: ds_write_b32 v0, v0
60 define void @func_add_constant_to_fi_i32() #0 { 71 define void @func_add_constant_to_fi_i32() #0 {
61 %alloca = alloca [2 x i32], align 4, addrspace(5) 72 %alloca = alloca [2 x i32], align 4, addrspace(5)
64 ret void 75 ret void
65 } 76 }
66 77
67 ; A user the materialized frame index can't be meaningfully folded 78 ; A user the materialized frame index can't be meaningfully folded
68 ; into. 79 ; into.
80 ; FIXME: Should use s_mul but the frame index always gets materialized into a
81 ; vgpr
69 82
70 ; GCN-LABEL: {{^}}func_other_fi_user_i32: 83 ; GCN-LABEL: {{^}}func_other_fi_user_i32:
71 84
72 ; CI: v_lshr_b32_e64 v0, s32, 6 85 ; CI: v_lshr_b32_e64 v0, s32, 6
73 86
74 ; GFX9: v_lshrrev_b32_e64 v0, 6, s32 87 ; GFX9-MUBUF: v_lshrrev_b32_e64 v0, 6, s32
75 88 ; GFX9-FLATSCR: v_mov_b32_e32 v0, s32
76 ; GCN-NEXT: v_mul_u32_u24_e32 v0, 9, v0 89
90 ; GCN-NEXT: v_mul_lo_u32 v0, v0, 9
77 ; GCN-NOT: v_mov 91 ; GCN-NOT: v_mov
78 ; GCN: ds_write_b32 v0, v0 92 ; GCN: ds_write_b32 v0, v0
79 define void @func_other_fi_user_i32() #0 { 93 define void @func_other_fi_user_i32() #0 {
80 %alloca = alloca [2 x i32], align 4, addrspace(5) 94 %alloca = alloca [2 x i32], align 4, addrspace(5)
81 %ptrtoint = ptrtoint [2 x i32] addrspace(5)* %alloca to i32 95 %ptrtoint = ptrtoint [2 x i32] addrspace(5)* %alloca to i32
84 ret void 98 ret void
85 } 99 }
86 100
87 ; GCN-LABEL: {{^}}func_store_private_arg_i32_ptr: 101 ; GCN-LABEL: {{^}}func_store_private_arg_i32_ptr:
88 ; GCN: v_mov_b32_e32 v1, 15{{$}} 102 ; GCN: v_mov_b32_e32 v1, 15{{$}}
89 ; GCN: buffer_store_dword v1, v0, s[0:3], 0 offen{{$}} 103 ; MUBUF: buffer_store_dword v1, v0, s[0:3], 0 offen{{$}}
104 ; GFX9-FLATSCR: scratch_store_dword v0, v1, off{{$}}
90 define void @func_store_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 { 105 define void @func_store_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 {
91 store volatile i32 15, i32 addrspace(5)* %ptr 106 store volatile i32 15, i32 addrspace(5)* %ptr
92 ret void 107 ret void
93 } 108 }
94 109
95 ; GCN-LABEL: {{^}}func_load_private_arg_i32_ptr: 110 ; GCN-LABEL: {{^}}func_load_private_arg_i32_ptr:
96 ; GCN: s_waitcnt 111 ; GCN: s_waitcnt
97 ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen{{$}} 112 ; MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc{{$}}
113 ; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off glc{{$}}
98 define void @func_load_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 { 114 define void @func_load_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 {
99 %val = load volatile i32, i32 addrspace(5)* %ptr 115 %val = load volatile i32, i32 addrspace(5)* %ptr
100 ret void 116 ret void
101 } 117 }
102 118
104 ; GCN: s_waitcnt 120 ; GCN: s_waitcnt
105 121
106 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6 122 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6
107 ; CI-NEXT: v_or_b32_e32 v0, 4, [[SHIFT]] 123 ; CI-NEXT: v_or_b32_e32 v0, 4, [[SHIFT]]
108 124
109 ; GFX9: v_lshrrev_b32_e64 [[SHIFT:v[0-9]+]], 6, s32 125 ; GFX9-MUBUF: v_lshrrev_b32_e64 [[SHIFT:v[0-9]+]], 6, s32
110 ; GFX9-NEXT: v_or_b32_e32 v0, 4, [[SHIFT]] 126 ; GFX9-MUBUF-NEXT: v_or_b32_e32 v0, 4, [[SHIFT]]
127
128 ; GFX9-FLATSCR: v_mov_b32_e32 [[SP:v[0-9]+]], s32
129 ; GFX9-FLATSCR-NEXT: v_or_b32_e32 v0, 4, [[SP]]
111 130
112 ; GCN-NOT: v_mov 131 ; GCN-NOT: v_mov
113 ; GCN: ds_write_b32 v0, v0 132 ; GCN: ds_write_b32 v0, v0
114 define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 } addrspace(5)* byval %arg0) #0 { 133 define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0) #0 {
115 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 134 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
116 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 135 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
117 %load1 = load i32, i32 addrspace(5)* %gep1 136 %load1 = load i32, i32 addrspace(5)* %gep1
118 store volatile i32 addrspace(5)* %gep1, i32 addrspace(5)* addrspace(3)* undef 137 store volatile i32 addrspace(5)* %gep1, i32 addrspace(5)* addrspace(3)* undef
119 ret void 138 ret void
120 } 139 }
121 140
122 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_value: 141 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_value:
123 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124 ; GCN-NEXT: buffer_load_ubyte v0, off, s[0:3], s32 143 ; MUBUF-NEXT: buffer_load_ubyte v0, off, s[0:3], s32
125 ; GCN_NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 144 ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4
126 define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 } addrspace(5)* byval %arg0) #0 { 145 ; GFX9-FLATSCR-NEXT: scratch_load_ubyte v0, off, s32
146 ; GFX9-FLATSCR-NEXT: scratch_load_dword v1, off, s32 offset:4
147 define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0) #0 {
127 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 148 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
128 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 149 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
129 %load0 = load i8, i8 addrspace(5)* %gep0 150 %load0 = load i8, i8 addrspace(5)* %gep0
130 %load1 = load i32, i32 addrspace(5)* %gep1 151 %load1 = load i32, i32 addrspace(5)* %gep1
131 store volatile i8 %load0, i8 addrspace(3)* undef 152 store volatile i8 %load0, i8 addrspace(3)* undef
135 156
136 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block: 157 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block:
137 158
138 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6 159 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6
139 160
140 ; GFX9: v_lshrrev_b32_e64 [[SHIFT:v[0-9]+]], 6, s32 161 ; GFX9-MUBUF: v_lshrrev_b32_e64 [[SP:v[0-9]+]], 6, s32
162 ; GFX9-FLATSCR: v_mov_b32_e32 [[SP:v[0-9]+]], s32
141 163
142 ; GCN: s_and_saveexec_b64 164 ; GCN: s_and_saveexec_b64
143 165
144 ; CI: v_add_i32_e32 [[GEP:v[0-9]+]], vcc, 4, [[SHIFT]] 166 ; CI: v_add_i32_e32 [[GEP:v[0-9]+]], vcc, 4, [[SHIFT]]
145 ; CI: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4{{$}} 167 ; CI: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4 glc{{$}}
146 168
147 ; GFX9: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SHIFT]] 169 ; GFX9: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]]
148 ; GFX9: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4{{$}} 170 ; GFX9-MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4 glc{{$}}
171 ; GFX9-FLATSCR: scratch_load_dword v{{[0-9]+}}, off, s32 offset:4 glc{{$}}
149 172
150 ; GCN: ds_write_b32 v{{[0-9]+}}, [[GEP]] 173 ; GCN: ds_write_b32 v{{[0-9]+}}, [[GEP]]
151 define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 } addrspace(5)* byval %arg0, i32 %arg2) #0 { 174 define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0, i32 %arg2) #0 {
152 %cmp = icmp eq i32 %arg2, 0 175 %cmp = icmp eq i32 %arg2, 0
153 br i1 %cmp, label %bb, label %ret 176 br i1 %cmp, label %bb, label %ret
154 177
155 bb: 178 bb:
156 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 179 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
168 191
169 ; CI-DAG: s_movk_i32 [[K:s[0-9]+|vcc_lo|vcc_hi]], 0x200 192 ; CI-DAG: s_movk_i32 [[K:s[0-9]+|vcc_lo|vcc_hi]], 0x200
170 ; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6 193 ; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6
171 ; CI: v_add_i32_e32 [[VZ:v[0-9]+]], vcc, [[K]], [[SCALED]] 194 ; CI: v_add_i32_e32 [[VZ:v[0-9]+]], vcc, [[K]], [[SCALED]]
172 195
173 ; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 196 ; GFX9-MUBUF-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32
174 ; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]] 197 ; GFX9-MUBUF: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]]
175 198
176 ; GCN: v_mul_u32_u24_e32 [[VZ]], 9, [[VZ]] 199 ; GFX9-FLATSCR-DAG: s_add_u32 [[SZ:[^,]+]], s32, 0x200
200 ; GFX9-FLATSCR: v_mov_b32_e32 [[VZ:v[0-9]+]], [[SZ]]
201
202 ; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9
177 ; GCN: ds_write_b32 v0, [[VZ]] 203 ; GCN: ds_write_b32 v0, [[VZ]]
178 define void @func_other_fi_user_non_inline_imm_offset_i32() #0 { 204 define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
179 %alloca0 = alloca [128 x i32], align 4, addrspace(5) 205 %alloca0 = alloca [128 x i32], align 4, addrspace(5)
180 %alloca1 = alloca [8 x i32], align 4, addrspace(5) 206 %alloca1 = alloca [8 x i32], align 4, addrspace(5)
181 %gep0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca0, i32 0, i32 65 207 %gep0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca0, i32 0, i32 65
191 217
192 ; CI-DAG: s_movk_i32 [[OFFSET:s[0-9]+]], 0x200 218 ; CI-DAG: s_movk_i32 [[OFFSET:s[0-9]+]], 0x200
193 ; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6 219 ; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6
194 ; CI: v_add_i32_e64 [[VZ:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[OFFSET]], [[SCALED]] 220 ; CI: v_add_i32_e64 [[VZ:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[OFFSET]], [[SCALED]]
195 221
196 ; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 222 ; GFX9-MUBUF-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32
197 ; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]] 223 ; GFX9-MUBUF: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]]
198 224
199 ; GCN: v_mul_u32_u24_e32 [[VZ]], 9, [[VZ]] 225 ; GFX9-FLATSCR-DAG: s_add_u32 [[SZ:[^,]+]], s32, 0x200
226 ; GFX9-FLATSCR: v_mov_b32_e32 [[VZ:v[0-9]+]], [[SZ]]
227
228 ; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9
200 ; GCN: ds_write_b32 v0, [[VZ]] 229 ; GCN: ds_write_b32 v0, [[VZ]]
201 define void @func_other_fi_user_non_inline_imm_offset_i32_vcc_live() #0 { 230 define void @func_other_fi_user_non_inline_imm_offset_i32_vcc_live() #0 {
202 %alloca0 = alloca [128 x i32], align 4, addrspace(5) 231 %alloca0 = alloca [128 x i32], align 4, addrspace(5)
203 %alloca1 = alloca [8 x i32], align 4, addrspace(5) 232 %alloca1 = alloca [8 x i32], align 4, addrspace(5)
204 %vcc = call i64 asm sideeffect "; def $0", "={vcc}"() 233 %vcc = call i64 asm sideeffect "; def $0", "={vcc}"()
217 ; undef flag not preserved in eliminateFrameIndex when handling the 246 ; undef flag not preserved in eliminateFrameIndex when handling the
218 ; stores in the middle block. 247 ; stores in the middle block.
219 248
220 ; GCN-LABEL: {{^}}undefined_stack_store_reg: 249 ; GCN-LABEL: {{^}}undefined_stack_store_reg:
221 ; GCN: s_and_saveexec_b64 250 ; GCN: s_and_saveexec_b64
222 ; GCN: buffer_store_dword v0, off, s[0:3], s33 offset: 251 ; MUBUF: buffer_store_dword v0, off, s[0:3], s33 offset:
223 ; GCN: buffer_store_dword v0, off, s[0:3], s33 offset: 252 ; MUBUF: buffer_store_dword v0, off, s[0:3], s33 offset:
224 ; GCN: buffer_store_dword v0, off, s[0:3], s33 offset: 253 ; MUBUF: buffer_store_dword v0, off, s[0:3], s33 offset:
225 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset: 254 ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:
255 ; FLATSCR: scratch_store_dword v0, off, s33 offset:
256 ; FLATSCR: scratch_store_dword v0, off, s33 offset:
257 ; FLATSCR: scratch_store_dword v0, off, s33 offset:
258 ; FLATSCR: scratch_store_dword v{{[0-9]+}}, off, s33 offset:
226 define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 { 259 define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 {
227 bb: 260 bb:
228 %tmp = alloca <4 x float>, align 16, addrspace(5) 261 %tmp = alloca <4 x float>, align 16, addrspace(5)
229 %tmp2 = insertelement <4 x float> undef, float %arg, i32 0 262 %tmp2 = insertelement <4 x float> undef, float %arg, i32 0
230 store <4 x float> %tmp2, <4 x float> addrspace(5)* undef 263 store <4 x float> %tmp2, <4 x float> addrspace(5)* undef
241 ret void 274 ret void
242 } 275 }
243 276
244 ; GCN-LABEL: {{^}}alloca_ptr_nonentry_block: 277 ; GCN-LABEL: {{^}}alloca_ptr_nonentry_block:
245 ; GCN: s_and_saveexec_b64 278 ; GCN: s_and_saveexec_b64
246 ; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4 279 ; MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4
280 ; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, s32 offset:4
247 281
248 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6 282 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6
249 ; CI-NEXT: v_or_b32_e32 [[PTR:v[0-9]+]], 4, [[SHIFT]] 283 ; CI-NEXT: v_or_b32_e32 [[PTR:v[0-9]+]], 4, [[SHIFT]]
250 284
251 ; GFX9: v_lshrrev_b32_e64 [[SHIFT:v[0-9]+]], 6, s32 285 ; GFX9-MUBUF: v_lshrrev_b32_e64 [[SHIFT:v[0-9]+]], 6, s32
252 ; GFX9-NEXT: v_or_b32_e32 [[PTR:v[0-9]+]], 4, [[SHIFT]] 286 ; GFX9-MUBUF-NEXT: v_or_b32_e32 [[PTR:v[0-9]+]], 4, [[SHIFT]]
287
288 ; GFX9-FLATSCR: v_mov_b32_e32 [[SP:v[0-9]+]], s32
289 ; GFX9-FLATSCR-NEXT: v_or_b32_e32 [[PTR:v[0-9]+]], 4, [[SP]]
253 290
254 ; GCN: ds_write_b32 v{{[0-9]+}}, [[PTR]] 291 ; GCN: ds_write_b32 v{{[0-9]+}}, [[PTR]]
255 define void @alloca_ptr_nonentry_block(i32 %arg0) #0 { 292 define void @alloca_ptr_nonentry_block(i32 %arg0) #0 {
256 %alloca0 = alloca { i8, i32 }, align 4, addrspace(5) 293 %alloca0 = alloca { i8, i32 }, align 4, addrspace(5)
257 %cmp = icmp eq i32 %arg0, 0 294 %cmp = icmp eq i32 %arg0, 0