150
|
1 # RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s
|
|
2 --- |
|
|
3 define amdgpu_kernel void @add_f32_1.0_one_f16_use() #0 {
|
252
|
4 %f16.val0 = load volatile half, ptr addrspace(1) undef
|
|
5 %f16.val1 = load volatile half, ptr addrspace(1) undef
|
|
6 %f32.val = load volatile float, ptr addrspace(1) undef
|
150
|
7 %f16.add0 = fadd half %f16.val0, 0xH3C00
|
|
8 %f32.add = fadd float %f32.val, 1.000000e+00
|
252
|
9 store volatile half %f16.add0, ptr addrspace(1) undef
|
|
10 store volatile float %f32.add, ptr addrspace(1) undef
|
150
|
11 ret void
|
|
12 }
|
|
13
|
|
14 define amdgpu_kernel void @add_f32_1.0_multi_f16_use() #0 {
|
252
|
15 %f16.val0 = load volatile half, ptr addrspace(1) undef
|
|
16 %f16.val1 = load volatile half, ptr addrspace(1) undef
|
|
17 %f32.val = load volatile float, ptr addrspace(1) undef
|
150
|
18 %f16.add0 = fadd half %f16.val0, 0xH3C00
|
|
19 %f32.add = fadd float %f32.val, 1.000000e+00
|
252
|
20 store volatile half %f16.add0, ptr addrspace(1) undef
|
|
21 store volatile float %f32.add, ptr addrspace(1) undef
|
150
|
22 ret void
|
|
23 }
|
|
24
|
|
25 define amdgpu_kernel void @add_f32_1.0_one_f32_use_one_f16_use () #0 {
|
252
|
26 %f16.val0 = load volatile half, ptr addrspace(1) undef
|
|
27 %f16.val1 = load volatile half, ptr addrspace(1) undef
|
|
28 %f32.val = load volatile float, ptr addrspace(1) undef
|
150
|
29 %f16.add0 = fadd half %f16.val0, 0xH3C00
|
|
30 %f32.add = fadd float %f32.val, 1.000000e+00
|
252
|
31 store volatile half %f16.add0, ptr addrspace(1) undef
|
|
32 store volatile float %f32.add, ptr addrspace(1) undef
|
150
|
33 ret void
|
|
34 }
|
|
35
|
|
36 define amdgpu_kernel void @add_f32_1.0_one_f32_use_multi_f16_use () #0 {
|
252
|
37 %f16.val0 = load volatile half, ptr addrspace(1) undef
|
|
38 %f16.val1 = load volatile half, ptr addrspace(1) undef
|
|
39 %f32.val = load volatile float, ptr addrspace(1) undef
|
150
|
40 %f16.add0 = fadd half %f16.val0, 0xH3C00
|
|
41 %f16.add1 = fadd half %f16.val1, 0xH3C00
|
|
42 %f32.add = fadd float %f32.val, 1.000000e+00
|
252
|
43 store volatile half %f16.add0, ptr addrspace(1) undef
|
|
44 store volatile half %f16.add1, ptr addrspace(1) undef
|
|
45 store volatile float %f32.add, ptr addrspace(1) undef
|
150
|
46 ret void
|
|
47 }
|
|
48
|
|
49 define amdgpu_kernel void @add_i32_1_multi_f16_use() #0 {
|
252
|
50 %f16.val0 = load volatile half, ptr addrspace(1) undef
|
|
51 %f16.val1 = load volatile half, ptr addrspace(1) undef
|
150
|
52 %f16.add0 = fadd half %f16.val0, 0xH0001
|
|
53 %f16.add1 = fadd half %f16.val1, 0xH0001
|
252
|
54 store volatile half %f16.add0, ptr addrspace(1) undef
|
|
55 store volatile half %f16.add1,ptr addrspace(1) undef
|
150
|
56 ret void
|
|
57 }
|
|
58
|
|
59 define amdgpu_kernel void @add_i32_m2_one_f32_use_multi_f16_use () #0 {
|
252
|
60 %f16.val0 = load volatile half, ptr addrspace(1) undef
|
|
61 %f16.val1 = load volatile half, ptr addrspace(1) undef
|
|
62 %f32.val = load volatile float, ptr addrspace(1) undef
|
150
|
63 %f16.add0 = fadd half %f16.val0, 0xHFFFE
|
|
64 %f16.add1 = fadd half %f16.val1, 0xHFFFE
|
|
65 %f32.add = fadd float %f32.val, 0xffffffffc0000000
|
252
|
66 store volatile half %f16.add0, ptr addrspace(1) undef
|
|
67 store volatile half %f16.add1, ptr addrspace(1) undef
|
|
68 store volatile float %f32.add, ptr addrspace(1) undef
|
150
|
69 ret void
|
|
70 }
|
|
71
|
|
72 define amdgpu_kernel void @add_f16_1.0_multi_f32_use() #0 {
|
252
|
73 %f32.val0 = load volatile float, ptr addrspace(1) undef
|
|
74 %f32.val1 = load volatile float, ptr addrspace(1) undef
|
|
75 %f32.val = load volatile float, ptr addrspace(1) undef
|
150
|
76 %f32.add0 = fadd float %f32.val0, 1.0
|
|
77 %f32.add1 = fadd float %f32.val1, 1.0
|
252
|
78 store volatile float %f32.add0, ptr addrspace(1) undef
|
|
79 store volatile float %f32.add1, ptr addrspace(1) undef
|
150
|
80 ret void
|
|
81 }
|
|
82
|
|
83 define amdgpu_kernel void @add_f16_1.0_other_high_bits_multi_f16_use() #0 {
|
252
|
84 %f16.val0 = load volatile half, ptr addrspace(1) undef
|
|
85 %f16.val1 = load volatile half, ptr addrspace(1) undef
|
|
86 %f32.val = load volatile half, ptr addrspace(1) undef
|
150
|
87 %f16.add0 = fadd half %f16.val0, 0xH3C00
|
|
88 %f32.add = fadd half %f32.val, 1.000000e+00
|
252
|
89 store volatile half %f16.add0, ptr addrspace(1) undef
|
|
90 store volatile half %f32.add, ptr addrspace(1) undef
|
150
|
91 ret void
|
|
92 }
|
|
93
|
|
94 define amdgpu_kernel void @add_f16_1.0_other_high_bits_use_f16_f32() #0 {
|
252
|
95 %f16.val0 = load volatile half, ptr addrspace(1) undef
|
|
96 %f16.val1 = load volatile half, ptr addrspace(1) undef
|
|
97 %f32.val = load volatile half, ptr addrspace(1) undef
|
150
|
98 %f16.add0 = fadd half %f16.val0, 0xH3C00
|
|
99 %f32.add = fadd half %f32.val, 1.000000e+00
|
252
|
100 store volatile half %f16.add0, ptr addrspace(1) undef
|
|
101 store volatile half %f32.add, ptr addrspace(1) undef
|
150
|
102 ret void
|
|
103 }
|
|
104
|
|
105 attributes #0 = { nounwind }
|
|
106
|
|
107 ...
|
|
108 ---
|
|
109
|
|
110 # f32 1.0 with a single use should be folded as the low 32-bits of a
|
|
111 # literal constant.
|
|
112
|
|
113 # CHECK-LABEL: name: add_f32_1.0_one_f16_use
|
221
|
114 # CHECK: %13:vgpr_32 = V_ADD_F16_e32 1065353216, killed %11, implicit $mode, implicit $exec
|
150
|
115
|
|
116 name: add_f32_1.0_one_f16_use
|
|
117 alignment: 1
|
|
118 exposesReturnsTwice: false
|
|
119 legalized: false
|
|
120 regBankSelected: false
|
|
121 selected: false
|
|
122 tracksRegLiveness: true
|
|
123 registers:
|
|
124 - { id: 0, class: sreg_64 }
|
|
125 - { id: 1, class: sreg_32 }
|
|
126 - { id: 2, class: sgpr_32 }
|
|
127 - { id: 3, class: vgpr_32 }
|
|
128 - { id: 4, class: sreg_64 }
|
|
129 - { id: 5, class: sreg_32 }
|
|
130 - { id: 6, class: sreg_64 }
|
|
131 - { id: 7, class: sreg_32 }
|
|
132 - { id: 8, class: sreg_32 }
|
|
133 - { id: 9, class: sreg_32 }
|
|
134 - { id: 10, class: sgpr_128 }
|
|
135 - { id: 11, class: vgpr_32 }
|
|
136 - { id: 12, class: vgpr_32 }
|
|
137 - { id: 13, class: vgpr_32 }
|
|
138 frameInfo:
|
|
139 isFrameAddressTaken: false
|
|
140 isReturnAddressTaken: false
|
|
141 hasStackMap: false
|
|
142 hasPatchPoint: false
|
|
143 stackSize: 0
|
|
144 offsetAdjustment: 0
|
|
145 maxAlignment: 0
|
|
146 adjustsStack: false
|
|
147 hasCalls: false
|
|
148 maxCallFrameSize: 0
|
|
149 hasOpaqueSPAdjustment: false
|
|
150 hasVAStart: false
|
|
151 hasMustTailInVarArgFunc: false
|
|
152 body: |
|
|
153 bb.0 (%ir-block.0):
|
|
154 %4 = IMPLICIT_DEF
|
|
155 %5 = COPY %4.sub1
|
|
156 %6 = IMPLICIT_DEF
|
|
157 %7 = COPY %6.sub0
|
|
158 %8 = S_MOV_B32 61440
|
|
159 %9 = S_MOV_B32 -1
|
|
160 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
252
|
161 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
|
150
|
162 %12 = V_MOV_B32_e32 1065353216, implicit $exec
|
221
|
163 %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $mode, implicit $exec
|
252
|
164 BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
|
150
|
165 S_ENDPGM 0
|
|
166
|
|
167 ...
|
|
168 ---
|
|
169 # Materialized f32 inline immediate should not be folded into the f16
|
|
170 # operands
|
|
171
|
|
172 # CHECK-LABEL: name: add_f32_1.0_multi_f16_use
|
236
|
173 # CHECK: %14:vgpr_32 = V_ADD_F16_e32 1065353216, killed %11, implicit $mode, implicit $exec
|
|
174 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, killed %12, implicit $mode, implicit $exec
|
150
|
175
|
|
176
|
|
177 name: add_f32_1.0_multi_f16_use
|
|
178 alignment: 1
|
|
179 exposesReturnsTwice: false
|
|
180 legalized: false
|
|
181 regBankSelected: false
|
|
182 selected: false
|
|
183 tracksRegLiveness: true
|
|
184 registers:
|
|
185 - { id: 0, class: sreg_64 }
|
|
186 - { id: 1, class: sreg_32 }
|
|
187 - { id: 2, class: sgpr_32 }
|
|
188 - { id: 3, class: vgpr_32 }
|
|
189 - { id: 4, class: sreg_64 }
|
|
190 - { id: 5, class: sreg_32 }
|
|
191 - { id: 6, class: sreg_64 }
|
|
192 - { id: 7, class: sreg_32 }
|
|
193 - { id: 8, class: sreg_32 }
|
|
194 - { id: 9, class: sreg_32 }
|
|
195 - { id: 10, class: sgpr_128 }
|
|
196 - { id: 11, class: vgpr_32 }
|
|
197 - { id: 12, class: vgpr_32 }
|
|
198 - { id: 13, class: vgpr_32 }
|
|
199 - { id: 14, class: vgpr_32 }
|
|
200 - { id: 15, class: vgpr_32 }
|
|
201 frameInfo:
|
|
202 isFrameAddressTaken: false
|
|
203 isReturnAddressTaken: false
|
|
204 hasStackMap: false
|
|
205 hasPatchPoint: false
|
|
206 stackSize: 0
|
|
207 offsetAdjustment: 0
|
|
208 maxAlignment: 0
|
|
209 adjustsStack: false
|
|
210 hasCalls: false
|
|
211 maxCallFrameSize: 0
|
|
212 hasOpaqueSPAdjustment: false
|
|
213 hasVAStart: false
|
|
214 hasMustTailInVarArgFunc: false
|
|
215 body: |
|
|
216 bb.0 (%ir-block.0):
|
|
217 %4 = IMPLICIT_DEF
|
|
218 %5 = COPY %4.sub1
|
|
219 %6 = IMPLICIT_DEF
|
|
220 %7 = COPY %6.sub0
|
|
221 %8 = S_MOV_B32 61440
|
|
222 %9 = S_MOV_B32 -1
|
|
223 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
252
|
224 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
|
|
225 %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
|
150
|
226 %13 = V_MOV_B32_e32 1065353216, implicit $exec
|
221
|
227 %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec
|
|
228 %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec
|
252
|
229 BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
|
|
230 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
|
150
|
231 S_ENDPGM 0
|
|
232
|
|
233 ...
|
|
234 ---
|
|
235
|
|
236 # f32 1.0 should be folded into the single f32 use as an inline
|
|
237 # immediate, and folded into the single f16 use as a literal constant
|
|
238
|
|
239 # CHECK-LABEL: name: add_f32_1.0_one_f32_use_one_f16_use
|
221
|
240 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit $mode, implicit $exec
|
|
241 # CHECK: %16:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $mode, implicit $exec
|
150
|
242
|
|
243 name: add_f32_1.0_one_f32_use_one_f16_use
|
|
244 alignment: 1
|
|
245 exposesReturnsTwice: false
|
|
246 legalized: false
|
|
247 regBankSelected: false
|
|
248 selected: false
|
|
249 tracksRegLiveness: true
|
|
250 registers:
|
|
251 - { id: 0, class: sreg_64 }
|
|
252 - { id: 1, class: sreg_32 }
|
|
253 - { id: 2, class: sgpr_32 }
|
|
254 - { id: 3, class: vgpr_32 }
|
|
255 - { id: 4, class: sreg_64 }
|
|
256 - { id: 5, class: sreg_32 }
|
|
257 - { id: 6, class: sreg_64 }
|
|
258 - { id: 7, class: sreg_32 }
|
|
259 - { id: 8, class: sreg_32 }
|
|
260 - { id: 9, class: sreg_32 }
|
|
261 - { id: 10, class: sgpr_128 }
|
|
262 - { id: 11, class: vgpr_32 }
|
|
263 - { id: 12, class: vgpr_32 }
|
|
264 - { id: 13, class: vgpr_32 }
|
|
265 - { id: 14, class: vgpr_32 }
|
|
266 - { id: 15, class: vgpr_32 }
|
|
267 - { id: 16, class: vgpr_32 }
|
|
268 frameInfo:
|
|
269 isFrameAddressTaken: false
|
|
270 isReturnAddressTaken: false
|
|
271 hasStackMap: false
|
|
272 hasPatchPoint: false
|
|
273 stackSize: 0
|
|
274 offsetAdjustment: 0
|
|
275 maxAlignment: 0
|
|
276 adjustsStack: false
|
|
277 hasCalls: false
|
|
278 maxCallFrameSize: 0
|
|
279 hasOpaqueSPAdjustment: false
|
|
280 hasVAStart: false
|
|
281 hasMustTailInVarArgFunc: false
|
|
282 body: |
|
|
283 bb.0 (%ir-block.0):
|
|
284 %4 = IMPLICIT_DEF
|
|
285 %5 = COPY %4.sub1
|
|
286 %6 = IMPLICIT_DEF
|
|
287 %7 = COPY %6.sub0
|
|
288 %8 = S_MOV_B32 61440
|
|
289 %9 = S_MOV_B32 -1
|
|
290 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
252
|
291 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
|
|
292 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
|
|
293 %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
|
150
|
294 %14 = V_MOV_B32_e32 1065353216, implicit $exec
|
221
|
295 %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
|
|
296 %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
|
252
|
297 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
|
|
298 BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`)
|
150
|
299 S_ENDPGM 0
|
|
300
|
|
301 ...
|
|
302 ---
|
|
303
|
|
304 # f32 1.0 should be folded for the single f32 use as an inline
|
|
305 # constant, and not folded as a multi-use literal for the f16 cases
|
|
306
|
|
307 # CHECK-LABEL: name: add_f32_1.0_one_f32_use_multi_f16_use
|
236
|
308 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit $mode, implicit $exec
|
|
309 # CHECK: %16:vgpr_32 = V_ADD_F16_e32 1065353216, %12, implicit $mode, implicit $exec
|
221
|
310 # CHECK: %17:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $mode, implicit $exec
|
150
|
311
|
|
312 name: add_f32_1.0_one_f32_use_multi_f16_use
|
|
313 alignment: 1
|
|
314 exposesReturnsTwice: false
|
|
315 legalized: false
|
|
316 regBankSelected: false
|
|
317 selected: false
|
|
318 tracksRegLiveness: true
|
|
319 registers:
|
|
320 - { id: 0, class: sreg_64 }
|
|
321 - { id: 1, class: sreg_32 }
|
|
322 - { id: 2, class: sgpr_32 }
|
|
323 - { id: 3, class: vgpr_32 }
|
|
324 - { id: 4, class: sreg_64 }
|
|
325 - { id: 5, class: sreg_32 }
|
|
326 - { id: 6, class: sreg_64 }
|
|
327 - { id: 7, class: sreg_32 }
|
|
328 - { id: 8, class: sreg_32 }
|
|
329 - { id: 9, class: sreg_32 }
|
|
330 - { id: 10, class: sgpr_128 }
|
|
331 - { id: 11, class: vgpr_32 }
|
|
332 - { id: 12, class: vgpr_32 }
|
|
333 - { id: 13, class: vgpr_32 }
|
|
334 - { id: 14, class: vgpr_32 }
|
|
335 - { id: 15, class: vgpr_32 }
|
|
336 - { id: 16, class: vgpr_32 }
|
|
337 - { id: 17, class: vgpr_32 }
|
|
338 frameInfo:
|
|
339 isFrameAddressTaken: false
|
|
340 isReturnAddressTaken: false
|
|
341 hasStackMap: false
|
|
342 hasPatchPoint: false
|
|
343 stackSize: 0
|
|
344 offsetAdjustment: 0
|
|
345 maxAlignment: 0
|
|
346 adjustsStack: false
|
|
347 hasCalls: false
|
|
348 maxCallFrameSize: 0
|
|
349 hasOpaqueSPAdjustment: false
|
|
350 hasVAStart: false
|
|
351 hasMustTailInVarArgFunc: false
|
|
352 body: |
|
|
353 bb.0 (%ir-block.0):
|
|
354 %4 = IMPLICIT_DEF
|
|
355 %5 = COPY %4.sub1
|
|
356 %6 = IMPLICIT_DEF
|
|
357 %7 = COPY %6.sub0
|
|
358 %8 = S_MOV_B32 61440
|
|
359 %9 = S_MOV_B32 -1
|
|
360 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
252
|
361 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
|
|
362 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
|
|
363 %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
|
150
|
364 %14 = V_MOV_B32_e32 1065353216, implicit $exec
|
221
|
365 %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
|
|
366 %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec
|
|
367 %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
|
252
|
368 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
|
|
369 BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
|
|
370 BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`)
|
150
|
371 S_ENDPGM 0
|
|
372
|
|
373 ...
|
|
374 ---
|
|
375 # CHECK-LABEL: name: add_i32_1_multi_f16_use
|
221
|
376 # CHECK: %14:vgpr_32 = V_ADD_F16_e32 1, killed %11, implicit $mode, implicit $exec
|
|
377 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 1, killed %12, implicit $mode, implicit $exec
|
150
|
378
|
|
379
|
|
380 name: add_i32_1_multi_f16_use
|
|
381 alignment: 1
|
|
382 exposesReturnsTwice: false
|
|
383 legalized: false
|
|
384 regBankSelected: false
|
|
385 selected: false
|
|
386 tracksRegLiveness: true
|
|
387 registers:
|
|
388 - { id: 0, class: sreg_64 }
|
|
389 - { id: 1, class: sreg_32 }
|
|
390 - { id: 2, class: sgpr_32 }
|
|
391 - { id: 3, class: vgpr_32 }
|
|
392 - { id: 4, class: sreg_64 }
|
|
393 - { id: 5, class: sreg_32 }
|
|
394 - { id: 6, class: sreg_64 }
|
|
395 - { id: 7, class: sreg_32 }
|
|
396 - { id: 8, class: sreg_32 }
|
|
397 - { id: 9, class: sreg_32 }
|
|
398 - { id: 10, class: sgpr_128 }
|
|
399 - { id: 11, class: vgpr_32 }
|
|
400 - { id: 12, class: vgpr_32 }
|
|
401 - { id: 13, class: vgpr_32 }
|
|
402 - { id: 14, class: vgpr_32 }
|
|
403 - { id: 15, class: vgpr_32 }
|
|
404 frameInfo:
|
|
405 isFrameAddressTaken: false
|
|
406 isReturnAddressTaken: false
|
|
407 hasStackMap: false
|
|
408 hasPatchPoint: false
|
|
409 stackSize: 0
|
|
410 offsetAdjustment: 0
|
|
411 maxAlignment: 0
|
|
412 adjustsStack: false
|
|
413 hasCalls: false
|
|
414 maxCallFrameSize: 0
|
|
415 hasOpaqueSPAdjustment: false
|
|
416 hasVAStart: false
|
|
417 hasMustTailInVarArgFunc: false
|
|
418 body: |
|
|
419 bb.0 (%ir-block.0):
|
|
420 %4 = IMPLICIT_DEF
|
|
421 %5 = COPY %4.sub1
|
|
422 %6 = IMPLICIT_DEF
|
|
423 %7 = COPY %6.sub0
|
|
424 %8 = S_MOV_B32 61440
|
|
425 %9 = S_MOV_B32 -1
|
|
426 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
252
|
427 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
|
|
428 %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
|
150
|
429 %13 = V_MOV_B32_e32 1, implicit $exec
|
221
|
430 %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec
|
|
431 %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec
|
252
|
432 BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
|
|
433 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
|
150
|
434 S_ENDPGM 0
|
|
435
|
|
436 ...
|
|
437 ---
|
|
438
|
|
439 # CHECK-LABEL: name: add_i32_m2_one_f32_use_multi_f16_use
|
221
|
440 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 -2, %11, implicit $mode, implicit $exec
|
|
441 # CHECK: %16:vgpr_32 = V_ADD_F16_e32 -2, %12, implicit $mode, implicit $exec
|
|
442 # CHECK: %17:vgpr_32 = V_ADD_F32_e32 -2, killed %13, implicit $mode, implicit $exec
|
150
|
443
|
|
444 name: add_i32_m2_one_f32_use_multi_f16_use
|
|
445 alignment: 1
|
|
446 exposesReturnsTwice: false
|
|
447 legalized: false
|
|
448 regBankSelected: false
|
|
449 selected: false
|
|
450 tracksRegLiveness: true
|
|
451 registers:
|
|
452 - { id: 0, class: sreg_64 }
|
|
453 - { id: 1, class: sreg_32 }
|
|
454 - { id: 2, class: sgpr_32 }
|
|
455 - { id: 3, class: vgpr_32 }
|
|
456 - { id: 4, class: sreg_64 }
|
|
457 - { id: 5, class: sreg_32 }
|
|
458 - { id: 6, class: sreg_64 }
|
|
459 - { id: 7, class: sreg_32 }
|
|
460 - { id: 8, class: sreg_32 }
|
|
461 - { id: 9, class: sreg_32 }
|
|
462 - { id: 10, class: sgpr_128 }
|
|
463 - { id: 11, class: vgpr_32 }
|
|
464 - { id: 12, class: vgpr_32 }
|
|
465 - { id: 13, class: vgpr_32 }
|
|
466 - { id: 14, class: vgpr_32 }
|
|
467 - { id: 15, class: vgpr_32 }
|
|
468 - { id: 16, class: vgpr_32 }
|
|
469 - { id: 17, class: vgpr_32 }
|
|
470 frameInfo:
|
|
471 isFrameAddressTaken: false
|
|
472 isReturnAddressTaken: false
|
|
473 hasStackMap: false
|
|
474 hasPatchPoint: false
|
|
475 stackSize: 0
|
|
476 offsetAdjustment: 0
|
|
477 maxAlignment: 0
|
|
478 adjustsStack: false
|
|
479 hasCalls: false
|
|
480 maxCallFrameSize: 0
|
|
481 hasOpaqueSPAdjustment: false
|
|
482 hasVAStart: false
|
|
483 hasMustTailInVarArgFunc: false
|
|
484 body: |
|
|
485 bb.0 (%ir-block.0):
|
|
486 %4 = IMPLICIT_DEF
|
|
487 %5 = COPY %4.sub1
|
|
488 %6 = IMPLICIT_DEF
|
|
489 %7 = COPY %6.sub0
|
|
490 %8 = S_MOV_B32 61440
|
|
491 %9 = S_MOV_B32 -1
|
|
492 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
252
|
493 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
|
|
494 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
|
|
495 %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
|
150
|
496 %14 = V_MOV_B32_e32 -2, implicit $exec
|
221
|
497 %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
|
|
498 %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec
|
|
499 %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
|
252
|
500 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
|
|
501 BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
|
|
502 BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`)
|
150
|
503 S_ENDPGM 0
|
|
504
|
|
505 ...
|
|
506 ---
|
|
507
|
|
508 # f32 1.0 should be folded for the single f32 use as an inline
|
|
509 # constant, and not folded as a multi-use literal for the f16 cases
|
|
510
|
|
511 # CHECK-LABEL: name: add_f16_1.0_multi_f32_use
|
236
|
512 # CHECK: %14:vgpr_32 = V_ADD_F32_e32 15360, %11, implicit $mode, implicit $exec
|
|
513 # CHECK: %15:vgpr_32 = V_ADD_F32_e32 15360, %12, implicit $mode, implicit $exec
|
150
|
514
|
|
515 name: add_f16_1.0_multi_f32_use
|
|
516 alignment: 1
|
|
517 exposesReturnsTwice: false
|
|
518 legalized: false
|
|
519 regBankSelected: false
|
|
520 selected: false
|
|
521 tracksRegLiveness: true
|
|
522 registers:
|
|
523 - { id: 0, class: sreg_64 }
|
|
524 - { id: 1, class: sreg_32 }
|
|
525 - { id: 2, class: sgpr_32 }
|
|
526 - { id: 3, class: vgpr_32 }
|
|
527 - { id: 4, class: sreg_64 }
|
|
528 - { id: 5, class: sreg_32 }
|
|
529 - { id: 6, class: sreg_64 }
|
|
530 - { id: 7, class: sreg_32 }
|
|
531 - { id: 8, class: sreg_32 }
|
|
532 - { id: 9, class: sreg_32 }
|
|
533 - { id: 10, class: sgpr_128 }
|
|
534 - { id: 11, class: vgpr_32 }
|
|
535 - { id: 12, class: vgpr_32 }
|
|
536 - { id: 13, class: vgpr_32 }
|
|
537 - { id: 14, class: vgpr_32 }
|
|
538 - { id: 15, class: vgpr_32 }
|
|
539 frameInfo:
|
|
540 isFrameAddressTaken: false
|
|
541 isReturnAddressTaken: false
|
|
542 hasStackMap: false
|
|
543 hasPatchPoint: false
|
|
544 stackSize: 0
|
|
545 offsetAdjustment: 0
|
|
546 maxAlignment: 0
|
|
547 adjustsStack: false
|
|
548 hasCalls: false
|
|
549 maxCallFrameSize: 0
|
|
550 hasOpaqueSPAdjustment: false
|
|
551 hasVAStart: false
|
|
552 hasMustTailInVarArgFunc: false
|
|
553 body: |
|
|
554 bb.0 (%ir-block.0):
|
|
555 %4 = IMPLICIT_DEF
|
|
556 %5 = COPY %4.sub1
|
|
557 %6 = IMPLICIT_DEF
|
|
558 %7 = COPY %6.sub0
|
|
559 %8 = S_MOV_B32 61440
|
|
560 %9 = S_MOV_B32 -1
|
|
561 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
252
|
562 %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
|
|
563 %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
|
150
|
564 %13 = V_MOV_B32_e32 15360, implicit $exec
|
221
|
565 %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
|
|
566 %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
|
252
|
567 BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`)
|
|
568 BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`)
|
150
|
569 S_ENDPGM 0
|
|
570
|
|
571 ...
|
|
572 ---
|
|
573
|
|
574 # The low 16-bits are an inline immediate, but the high bits are junk
|
|
575
|
|
576 # CHECK-LABEL: name: add_f16_1.0_other_high_bits_multi_f16_use
|
236
|
577 # CHECK: %14:vgpr_32 = V_ADD_F16_e32 80886784, %11, implicit $mode, implicit $exec
|
|
578 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 80886784, %12, implicit $mode, implicit $exec
|
150
|
579
|
|
580 name: add_f16_1.0_other_high_bits_multi_f16_use
|
|
581 alignment: 1
|
|
582 exposesReturnsTwice: false
|
|
583 legalized: false
|
|
584 regBankSelected: false
|
|
585 selected: false
|
|
586 tracksRegLiveness: true
|
|
587 registers:
|
|
588 - { id: 0, class: sreg_64 }
|
|
589 - { id: 1, class: sreg_32 }
|
|
590 - { id: 2, class: sgpr_32 }
|
|
591 - { id: 3, class: vgpr_32 }
|
|
592 - { id: 4, class: sreg_64 }
|
|
593 - { id: 5, class: sreg_32 }
|
|
594 - { id: 6, class: sreg_64 }
|
|
595 - { id: 7, class: sreg_32 }
|
|
596 - { id: 8, class: sreg_32 }
|
|
597 - { id: 9, class: sreg_32 }
|
|
598 - { id: 10, class: sgpr_128 }
|
|
599 - { id: 11, class: vgpr_32 }
|
|
600 - { id: 12, class: vgpr_32 }
|
|
601 - { id: 13, class: vgpr_32 }
|
|
602 - { id: 14, class: vgpr_32 }
|
|
603 - { id: 15, class: vgpr_32 }
|
|
604 frameInfo:
|
|
605 isFrameAddressTaken: false
|
|
606 isReturnAddressTaken: false
|
|
607 hasStackMap: false
|
|
608 hasPatchPoint: false
|
|
609 stackSize: 0
|
|
610 offsetAdjustment: 0
|
|
611 maxAlignment: 0
|
|
612 adjustsStack: false
|
|
613 hasCalls: false
|
|
614 maxCallFrameSize: 0
|
|
615 hasOpaqueSPAdjustment: false
|
|
616 hasVAStart: false
|
|
617 hasMustTailInVarArgFunc: false
|
|
618 body: |
|
|
619 bb.0 (%ir-block.0):
|
|
620 %4 = IMPLICIT_DEF
|
|
621 %5 = COPY %4.sub1
|
|
622 %6 = IMPLICIT_DEF
|
|
623 %7 = COPY %6.sub0
|
|
624 %8 = S_MOV_B32 61440
|
|
625 %9 = S_MOV_B32 -1
|
|
626 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
252
|
627 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
|
|
628 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
|
150
|
629 %13 = V_MOV_B32_e32 80886784, implicit $exec
|
221
|
630 %14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
|
|
631 %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
|
252
|
632 BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
|
|
633 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
|
150
|
634 S_ENDPGM 0
|
|
635
|
|
636 ...
|
|
637 ---
|
|
638
|
|
639 # CHECK-LABEL: name: add_f16_1.0_other_high_bits_use_f16_f32
|
236
|
640 # CHECK: %14:vgpr_32 = V_ADD_F32_e32 305413120, %11, implicit $mode, implicit $exec
|
|
641 # CHECK: %15:vgpr_32 = V_ADD_F16_e32 305413120, %12, implicit $mode, implicit $exec
|
150
|
642 name: add_f16_1.0_other_high_bits_use_f16_f32
|
|
643 alignment: 1
|
|
644 exposesReturnsTwice: false
|
|
645 legalized: false
|
|
646 regBankSelected: false
|
|
647 selected: false
|
|
648 tracksRegLiveness: true
|
|
649 registers:
|
|
650 - { id: 0, class: sreg_64 }
|
|
651 - { id: 1, class: sreg_32 }
|
|
652 - { id: 2, class: sgpr_32 }
|
|
653 - { id: 3, class: vgpr_32 }
|
|
654 - { id: 4, class: sreg_64 }
|
|
655 - { id: 5, class: sreg_32 }
|
|
656 - { id: 6, class: sreg_64 }
|
|
657 - { id: 7, class: sreg_32 }
|
|
658 - { id: 8, class: sreg_32 }
|
|
659 - { id: 9, class: sreg_32 }
|
|
660 - { id: 10, class: sgpr_128 }
|
|
661 - { id: 11, class: vgpr_32 }
|
|
662 - { id: 12, class: vgpr_32 }
|
|
663 - { id: 13, class: vgpr_32 }
|
|
664 - { id: 14, class: vgpr_32 }
|
|
665 - { id: 15, class: vgpr_32 }
|
|
666 frameInfo:
|
|
667 isFrameAddressTaken: false
|
|
668 isReturnAddressTaken: false
|
|
669 hasStackMap: false
|
|
670 hasPatchPoint: false
|
|
671 stackSize: 0
|
|
672 offsetAdjustment: 0
|
|
673 maxAlignment: 0
|
|
674 adjustsStack: false
|
|
675 hasCalls: false
|
|
676 maxCallFrameSize: 0
|
|
677 hasOpaqueSPAdjustment: false
|
|
678 hasVAStart: false
|
|
679 hasMustTailInVarArgFunc: false
|
|
680 body: |
|
|
681 bb.0 (%ir-block.0):
|
|
682 %4 = IMPLICIT_DEF
|
|
683 %5 = COPY %4.sub1
|
|
684 %6 = IMPLICIT_DEF
|
|
685 %7 = COPY %6.sub0
|
|
686 %8 = S_MOV_B32 61440
|
|
687 %9 = S_MOV_B32 -1
|
|
688 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
252
|
689 %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
|
|
690 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
|
150
|
691 %13 = V_MOV_B32_e32 305413120, implicit $exec
|
221
|
692 %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
|
|
693 %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
|
252
|
694 BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`)
|
|
695 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
|
150
|
696 S_ENDPGM 0
|
|
697
|
|
698 ...
|