Mercurial > hg > CbC > CbC_llvm
comparison llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll @ 207:2e18cbf3894f
LLVM12
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 08 Jun 2021 06:07:14 +0900 |
parents | 0572611fdcc8 |
children | 5f17cb93ff66 |
comparison
equal
deleted
inserted
replaced
173:0572611fdcc8 | 207:2e18cbf3894f |
---|---|
1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s | 1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MUBUF %s |
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s | 2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-MUBUF,MUBUF %s |
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -amdgpu-sroa=0 -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLATSCR %s | |
3 | 4 |
4 ; Test that non-entry function frame indices are expanded properly to | 5 ; Test that non-entry function frame indices are expanded properly to |
5 ; give an index relative to the scratch wave offset register | 6 ; give an index relative to the scratch wave offset register |
6 | 7 |
7 ; Materialize into a mov. Make sure there isn't an unnecessary copy. | 8 ; Materialize into a mov. Make sure there isn't an unnecessary copy. |
8 ; GCN-LABEL: {{^}}func_mov_fi_i32: | 9 ; GCN-LABEL: {{^}}func_mov_fi_i32: |
9 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 10 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
10 | 11 |
11 ; CI-NEXT: v_lshr_b32_e64 v0, s32, 6 | 12 ; CI-NEXT: v_lshr_b32_e64 v0, s32, 6 |
12 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s32 | 13 ; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
13 | 14 |
14 ; GCN-NOT: v_mov | 15 ; GFX9-FLATSCR: v_mov_b32_e32 v0, s32 |
16 ; GFX9-FLATSCR-NOT: v_lshrrev_b32_e64 | |
17 | |
18 ; MUBUF-NOT: v_mov | |
19 | |
15 ; GCN: ds_write_b32 v0, v0 | 20 ; GCN: ds_write_b32 v0, v0 |
16 define void @func_mov_fi_i32() #0 { | 21 define void @func_mov_fi_i32() #0 { |
17 %alloca = alloca i32, addrspace(5) | 22 %alloca = alloca i32, addrspace(5) |
18 store volatile i32 addrspace(5)* %alloca, i32 addrspace(5)* addrspace(3)* undef | 23 store volatile i32 addrspace(5)* %alloca, i32 addrspace(5)* addrspace(3)* undef |
19 ret void | 24 ret void |
28 ; CI: ds_write_b32 v0, v0 | 33 ; CI: ds_write_b32 v0, v0 |
29 ; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6 | 34 ; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6 |
30 ; CI-NEXT: v_add_i32_e{{32|64}} v0, {{s\[[0-9]+:[0-9]+\]|vcc}}, 4, [[SCALED]] | 35 ; CI-NEXT: v_add_i32_e{{32|64}} v0, {{s\[[0-9]+:[0-9]+\]|vcc}}, 4, [[SCALED]] |
31 ; CI-NEXT: ds_write_b32 v0, v0 | 36 ; CI-NEXT: ds_write_b32 v0, v0 |
32 | 37 |
33 ; GFX9: v_lshrrev_b32_e64 v0, 6, s32 | 38 ; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
34 ; GFX9-NEXT: ds_write_b32 v0, v0 | 39 ; GFX9-FLATSCR: v_mov_b32_e32 v0, s32 |
35 ; GFX9-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 | 40 ; GFX9-FLATSCR: s_add_u32 [[ADD:[^,]+]], s32, 4 |
36 ; GFX9-NEXT: v_add_u32_e32 v0, 4, [[SCALED]] | 41 ; GFX9-NEXT: ds_write_b32 v0, v0 |
37 ; GFX9-NEXT: ds_write_b32 v0, v0 | 42 ; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 |
43 ; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, [[SCALED]] | |
44 ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, [[ADD]] | |
45 ; GFX9-NEXT: ds_write_b32 v0, v0 | |
38 define void @func_mov_fi_i32_offset() #0 { | 46 define void @func_mov_fi_i32_offset() #0 { |
39 %alloca0 = alloca i32, addrspace(5) | 47 %alloca0 = alloca i32, addrspace(5) |
40 %alloca1 = alloca i32, addrspace(5) | 48 %alloca1 = alloca i32, addrspace(5) |
41 store volatile i32 addrspace(5)* %alloca0, i32 addrspace(5)* addrspace(3)* undef | 49 store volatile i32 addrspace(5)* %alloca0, i32 addrspace(5)* addrspace(3)* undef |
42 store volatile i32 addrspace(5)* %alloca1, i32 addrspace(5)* addrspace(3)* undef | 50 store volatile i32 addrspace(5)* %alloca1, i32 addrspace(5)* addrspace(3)* undef |
50 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 58 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
51 | 59 |
52 ; CI: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6 | 60 ; CI: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6 |
53 ; CI-NEXT: v_add_i32_e32 v0, vcc, 4, [[SCALED]] | 61 ; CI-NEXT: v_add_i32_e32 v0, vcc, 4, [[SCALED]] |
54 | 62 |
55 ; GFX9: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 | 63 ; GFX9-MUBUF: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 |
56 ; GFX9-NEXT: v_add_u32_e32 v0, 4, [[SCALED]] | 64 ; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, [[SCALED]] |
65 | |
66 ; GFX9-FLATSCR: v_mov_b32_e32 [[ADD:v[0-9]+]], s32 | |
67 ; GFX9-FLATSCR-NEXT: v_add_u32_e32 v0, 4, [[ADD]] | |
57 | 68 |
58 ; GCN-NOT: v_mov | 69 ; GCN-NOT: v_mov |
59 ; GCN: ds_write_b32 v0, v0 | 70 ; GCN: ds_write_b32 v0, v0 |
60 define void @func_add_constant_to_fi_i32() #0 { | 71 define void @func_add_constant_to_fi_i32() #0 { |
61 %alloca = alloca [2 x i32], align 4, addrspace(5) | 72 %alloca = alloca [2 x i32], align 4, addrspace(5) |
64 ret void | 75 ret void |
65 } | 76 } |
66 | 77 |
67 ; A user the materialized frame index can't be meaningfully folded | 78 ; A user the materialized frame index can't be meaningfully folded |
68 ; into. | 79 ; into. |
80 ; FIXME: Should use s_mul but the frame index always gets materialized into a | |
81 ; vgpr | |
69 | 82 |
70 ; GCN-LABEL: {{^}}func_other_fi_user_i32: | 83 ; GCN-LABEL: {{^}}func_other_fi_user_i32: |
71 | 84 |
72 ; CI: v_lshr_b32_e64 v0, s32, 6 | 85 ; CI: v_lshr_b32_e64 v0, s32, 6 |
73 | 86 |
74 ; GFX9: v_lshrrev_b32_e64 v0, 6, s32 | 87 ; GFX9-MUBUF: v_lshrrev_b32_e64 v0, 6, s32 |
75 | 88 ; GFX9-FLATSCR: v_mov_b32_e32 v0, s32 |
76 ; GCN-NEXT: v_mul_u32_u24_e32 v0, 9, v0 | 89 |
90 ; GCN-NEXT: v_mul_lo_u32 v0, v0, 9 | |
77 ; GCN-NOT: v_mov | 91 ; GCN-NOT: v_mov |
78 ; GCN: ds_write_b32 v0, v0 | 92 ; GCN: ds_write_b32 v0, v0 |
79 define void @func_other_fi_user_i32() #0 { | 93 define void @func_other_fi_user_i32() #0 { |
80 %alloca = alloca [2 x i32], align 4, addrspace(5) | 94 %alloca = alloca [2 x i32], align 4, addrspace(5) |
81 %ptrtoint = ptrtoint [2 x i32] addrspace(5)* %alloca to i32 | 95 %ptrtoint = ptrtoint [2 x i32] addrspace(5)* %alloca to i32 |
84 ret void | 98 ret void |
85 } | 99 } |
86 | 100 |
87 ; GCN-LABEL: {{^}}func_store_private_arg_i32_ptr: | 101 ; GCN-LABEL: {{^}}func_store_private_arg_i32_ptr: |
88 ; GCN: v_mov_b32_e32 v1, 15{{$}} | 102 ; GCN: v_mov_b32_e32 v1, 15{{$}} |
89 ; GCN: buffer_store_dword v1, v0, s[0:3], 0 offen{{$}} | 103 ; MUBUF: buffer_store_dword v1, v0, s[0:3], 0 offen{{$}} |
104 ; GFX9-FLATSCR: scratch_store_dword v0, v1, off{{$}} | |
90 define void @func_store_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 { | 105 define void @func_store_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 { |
91 store volatile i32 15, i32 addrspace(5)* %ptr | 106 store volatile i32 15, i32 addrspace(5)* %ptr |
92 ret void | 107 ret void |
93 } | 108 } |
94 | 109 |
95 ; GCN-LABEL: {{^}}func_load_private_arg_i32_ptr: | 110 ; GCN-LABEL: {{^}}func_load_private_arg_i32_ptr: |
96 ; GCN: s_waitcnt | 111 ; GCN: s_waitcnt |
97 ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen{{$}} | 112 ; MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc{{$}} |
113 ; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off glc{{$}} | |
98 define void @func_load_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 { | 114 define void @func_load_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 { |
99 %val = load volatile i32, i32 addrspace(5)* %ptr | 115 %val = load volatile i32, i32 addrspace(5)* %ptr |
100 ret void | 116 ret void |
101 } | 117 } |
102 | 118 |
104 ; GCN: s_waitcnt | 120 ; GCN: s_waitcnt |
105 | 121 |
106 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6 | 122 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6 |
107 ; CI-NEXT: v_or_b32_e32 v0, 4, [[SHIFT]] | 123 ; CI-NEXT: v_or_b32_e32 v0, 4, [[SHIFT]] |
108 | 124 |
109 ; GFX9: v_lshrrev_b32_e64 [[SHIFT:v[0-9]+]], 6, s32 | 125 ; GFX9-MUBUF: v_lshrrev_b32_e64 [[SHIFT:v[0-9]+]], 6, s32 |
110 ; GFX9-NEXT: v_or_b32_e32 v0, 4, [[SHIFT]] | 126 ; GFX9-MUBUF-NEXT: v_or_b32_e32 v0, 4, [[SHIFT]] |
127 | |
128 ; GFX9-FLATSCR: v_mov_b32_e32 [[SP:v[0-9]+]], s32 | |
129 ; GFX9-FLATSCR-NEXT: v_or_b32_e32 v0, 4, [[SP]] | |
111 | 130 |
112 ; GCN-NOT: v_mov | 131 ; GCN-NOT: v_mov |
113 ; GCN: ds_write_b32 v0, v0 | 132 ; GCN: ds_write_b32 v0, v0 |
114 define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 } addrspace(5)* byval %arg0) #0 { | 133 define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0) #0 { |
115 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 | 134 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 |
116 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 | 135 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 |
117 %load1 = load i32, i32 addrspace(5)* %gep1 | 136 %load1 = load i32, i32 addrspace(5)* %gep1 |
118 store volatile i32 addrspace(5)* %gep1, i32 addrspace(5)* addrspace(3)* undef | 137 store volatile i32 addrspace(5)* %gep1, i32 addrspace(5)* addrspace(3)* undef |
119 ret void | 138 ret void |
120 } | 139 } |
121 | 140 |
122 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_value: | 141 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_value: |
123 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 142 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
124 ; GCN-NEXT: buffer_load_ubyte v0, off, s[0:3], s32 | 143 ; MUBUF-NEXT: buffer_load_ubyte v0, off, s[0:3], s32 |
125 ; GCN_NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 | 144 ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 |
126 define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 } addrspace(5)* byval %arg0) #0 { | 145 ; GFX9-FLATSCR-NEXT: scratch_load_ubyte v0, off, s32 |
146 ; GFX9-FLATSCR-NEXT: scratch_load_dword v1, off, s32 offset:4 | |
147 define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0) #0 { | |
127 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 | 148 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 |
128 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 | 149 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 |
129 %load0 = load i8, i8 addrspace(5)* %gep0 | 150 %load0 = load i8, i8 addrspace(5)* %gep0 |
130 %load1 = load i32, i32 addrspace(5)* %gep1 | 151 %load1 = load i32, i32 addrspace(5)* %gep1 |
131 store volatile i8 %load0, i8 addrspace(3)* undef | 152 store volatile i8 %load0, i8 addrspace(3)* undef |
135 | 156 |
136 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block: | 157 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block: |
137 | 158 |
138 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6 | 159 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6 |
139 | 160 |
140 ; GFX9: v_lshrrev_b32_e64 [[SHIFT:v[0-9]+]], 6, s32 | 161 ; GFX9-MUBUF: v_lshrrev_b32_e64 [[SP:v[0-9]+]], 6, s32 |
162 ; GFX9-FLATSCR: v_mov_b32_e32 [[SP:v[0-9]+]], s32 | |
141 | 163 |
142 ; GCN: s_and_saveexec_b64 | 164 ; GCN: s_and_saveexec_b64 |
143 | 165 |
144 ; CI: v_add_i32_e32 [[GEP:v[0-9]+]], vcc, 4, [[SHIFT]] | 166 ; CI: v_add_i32_e32 [[GEP:v[0-9]+]], vcc, 4, [[SHIFT]] |
145 ; CI: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4{{$}} | 167 ; CI: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4 glc{{$}} |
146 | 168 |
147 ; GFX9: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SHIFT]] | 169 ; GFX9: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]] |
148 ; GFX9: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4{{$}} | 170 ; GFX9-MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4 glc{{$}} |
171 ; GFX9-FLATSCR: scratch_load_dword v{{[0-9]+}}, off, s32 offset:4 glc{{$}} | |
149 | 172 |
150 ; GCN: ds_write_b32 v{{[0-9]+}}, [[GEP]] | 173 ; GCN: ds_write_b32 v{{[0-9]+}}, [[GEP]] |
151 define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 } addrspace(5)* byval %arg0, i32 %arg2) #0 { | 174 define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0, i32 %arg2) #0 { |
152 %cmp = icmp eq i32 %arg2, 0 | 175 %cmp = icmp eq i32 %arg2, 0 |
153 br i1 %cmp, label %bb, label %ret | 176 br i1 %cmp, label %bb, label %ret |
154 | 177 |
155 bb: | 178 bb: |
156 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 | 179 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 |
168 | 191 |
169 ; CI-DAG: s_movk_i32 [[K:s[0-9]+|vcc_lo|vcc_hi]], 0x200 | 192 ; CI-DAG: s_movk_i32 [[K:s[0-9]+|vcc_lo|vcc_hi]], 0x200 |
170 ; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6 | 193 ; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6 |
171 ; CI: v_add_i32_e32 [[VZ:v[0-9]+]], vcc, [[K]], [[SCALED]] | 194 ; CI: v_add_i32_e32 [[VZ:v[0-9]+]], vcc, [[K]], [[SCALED]] |
172 | 195 |
173 ; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 | 196 ; GFX9-MUBUF-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 |
174 ; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]] | 197 ; GFX9-MUBUF: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]] |
175 | 198 |
176 ; GCN: v_mul_u32_u24_e32 [[VZ]], 9, [[VZ]] | 199 ; GFX9-FLATSCR-DAG: s_add_u32 [[SZ:[^,]+]], s32, 0x200 |
200 ; GFX9-FLATSCR: v_mov_b32_e32 [[VZ:v[0-9]+]], [[SZ]] | |
201 | |
202 ; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9 | |
177 ; GCN: ds_write_b32 v0, [[VZ]] | 203 ; GCN: ds_write_b32 v0, [[VZ]] |
178 define void @func_other_fi_user_non_inline_imm_offset_i32() #0 { | 204 define void @func_other_fi_user_non_inline_imm_offset_i32() #0 { |
179 %alloca0 = alloca [128 x i32], align 4, addrspace(5) | 205 %alloca0 = alloca [128 x i32], align 4, addrspace(5) |
180 %alloca1 = alloca [8 x i32], align 4, addrspace(5) | 206 %alloca1 = alloca [8 x i32], align 4, addrspace(5) |
181 %gep0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca0, i32 0, i32 65 | 207 %gep0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca0, i32 0, i32 65 |
191 | 217 |
192 ; CI-DAG: s_movk_i32 [[OFFSET:s[0-9]+]], 0x200 | 218 ; CI-DAG: s_movk_i32 [[OFFSET:s[0-9]+]], 0x200 |
193 ; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6 | 219 ; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s32, 6 |
194 ; CI: v_add_i32_e64 [[VZ:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[OFFSET]], [[SCALED]] | 220 ; CI: v_add_i32_e64 [[VZ:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[OFFSET]], [[SCALED]] |
195 | 221 |
196 ; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 | 222 ; GFX9-MUBUF-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 |
197 ; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]] | 223 ; GFX9-MUBUF: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]] |
198 | 224 |
199 ; GCN: v_mul_u32_u24_e32 [[VZ]], 9, [[VZ]] | 225 ; GFX9-FLATSCR-DAG: s_add_u32 [[SZ:[^,]+]], s32, 0x200 |
226 ; GFX9-FLATSCR: v_mov_b32_e32 [[VZ:v[0-9]+]], [[SZ]] | |
227 | |
228 ; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9 | |
200 ; GCN: ds_write_b32 v0, [[VZ]] | 229 ; GCN: ds_write_b32 v0, [[VZ]] |
201 define void @func_other_fi_user_non_inline_imm_offset_i32_vcc_live() #0 { | 230 define void @func_other_fi_user_non_inline_imm_offset_i32_vcc_live() #0 { |
202 %alloca0 = alloca [128 x i32], align 4, addrspace(5) | 231 %alloca0 = alloca [128 x i32], align 4, addrspace(5) |
203 %alloca1 = alloca [8 x i32], align 4, addrspace(5) | 232 %alloca1 = alloca [8 x i32], align 4, addrspace(5) |
204 %vcc = call i64 asm sideeffect "; def $0", "={vcc}"() | 233 %vcc = call i64 asm sideeffect "; def $0", "={vcc}"() |
217 ; undef flag not preserved in eliminateFrameIndex when handling the | 246 ; undef flag not preserved in eliminateFrameIndex when handling the |
218 ; stores in the middle block. | 247 ; stores in the middle block. |
219 | 248 |
220 ; GCN-LABEL: {{^}}undefined_stack_store_reg: | 249 ; GCN-LABEL: {{^}}undefined_stack_store_reg: |
221 ; GCN: s_and_saveexec_b64 | 250 ; GCN: s_and_saveexec_b64 |
222 ; GCN: buffer_store_dword v0, off, s[0:3], s33 offset: | 251 ; MUBUF: buffer_store_dword v0, off, s[0:3], s33 offset: |
223 ; GCN: buffer_store_dword v0, off, s[0:3], s33 offset: | 252 ; MUBUF: buffer_store_dword v0, off, s[0:3], s33 offset: |
224 ; GCN: buffer_store_dword v0, off, s[0:3], s33 offset: | 253 ; MUBUF: buffer_store_dword v0, off, s[0:3], s33 offset: |
225 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset: | 254 ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset: |
255 ; FLATSCR: scratch_store_dword v0, off, s33 offset: | |
256 ; FLATSCR: scratch_store_dword v0, off, s33 offset: | |
257 ; FLATSCR: scratch_store_dword v0, off, s33 offset: | |
258 ; FLATSCR: scratch_store_dword v{{[0-9]+}}, off, s33 offset: | |
226 define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 { | 259 define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 { |
227 bb: | 260 bb: |
228 %tmp = alloca <4 x float>, align 16, addrspace(5) | 261 %tmp = alloca <4 x float>, align 16, addrspace(5) |
229 %tmp2 = insertelement <4 x float> undef, float %arg, i32 0 | 262 %tmp2 = insertelement <4 x float> undef, float %arg, i32 0 |
230 store <4 x float> %tmp2, <4 x float> addrspace(5)* undef | 263 store <4 x float> %tmp2, <4 x float> addrspace(5)* undef |
241 ret void | 274 ret void |
242 } | 275 } |
243 | 276 |
244 ; GCN-LABEL: {{^}}alloca_ptr_nonentry_block: | 277 ; GCN-LABEL: {{^}}alloca_ptr_nonentry_block: |
245 ; GCN: s_and_saveexec_b64 | 278 ; GCN: s_and_saveexec_b64 |
246 ; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4 | 279 ; MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4 |
280 ; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, s32 offset:4 | |
247 | 281 |
248 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6 | 282 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6 |
249 ; CI-NEXT: v_or_b32_e32 [[PTR:v[0-9]+]], 4, [[SHIFT]] | 283 ; CI-NEXT: v_or_b32_e32 [[PTR:v[0-9]+]], 4, [[SHIFT]] |
250 | 284 |
251 ; GFX9: v_lshrrev_b32_e64 [[SHIFT:v[0-9]+]], 6, s32 | 285 ; GFX9-MUBUF: v_lshrrev_b32_e64 [[SHIFT:v[0-9]+]], 6, s32 |
252 ; GFX9-NEXT: v_or_b32_e32 [[PTR:v[0-9]+]], 4, [[SHIFT]] | 286 ; GFX9-MUBUF-NEXT: v_or_b32_e32 [[PTR:v[0-9]+]], 4, [[SHIFT]] |
287 | |
288 ; GFX9-FLATSCR: v_mov_b32_e32 [[SP:v[0-9]+]], s32 | |
289 ; GFX9-FLATSCR-NEXT: v_or_b32_e32 [[PTR:v[0-9]+]], 4, [[SP]] | |
253 | 290 |
254 ; GCN: ds_write_b32 v{{[0-9]+}}, [[PTR]] | 291 ; GCN: ds_write_b32 v{{[0-9]+}}, [[PTR]] |
255 define void @alloca_ptr_nonentry_block(i32 %arg0) #0 { | 292 define void @alloca_ptr_nonentry_block(i32 %arg0) #0 { |
256 %alloca0 = alloca { i8, i32 }, align 4, addrspace(5) | 293 %alloca0 = alloca { i8, i32 }, align 4, addrspace(5) |
257 %cmp = icmp eq i32 %arg0, 0 | 294 %cmp = icmp eq i32 %arg0, 0 |