Mercurial > hg > CbC > CbC_llvm
diff llvm/test/CodeGen/AMDGPU/imm.ll @ 236:c4bab56944e8 llvm-original
LLVM 16
author | kono |
---|---|
date | Wed, 09 Nov 2022 17:45:10 +0900 |
parents | 0572611fdcc8 |
children | 1f2b6ac9f198 |
line wrap: on
line diff
--- a/llvm/test/CodeGen/AMDGPU/imm.ll Wed Jul 21 10:27:27 2021 +0900 +++ b/llvm/test/CodeGen/AMDGPU/imm.ll Wed Nov 09 17:45:10 2022 +0900 @@ -423,24 +423,24 @@ define amdgpu_kernel void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_0.0_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_add_f32_e64 v0, s0, 0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_add_f32_e64 v0, s4, 0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_0.0_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_f32_e64 v0, s0, 0 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: v_add_f32_e64 v0, s4, 0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %y = fadd float %x, 0.0 store float %y, float addrspace(1)* %out @@ -450,24 +450,24 @@ define amdgpu_kernel void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_0.5_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_add_f32_e64 v0, s0, 0.5 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_add_f32_e64 v0, s4, 0.5 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_0.5_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_f32_e64 v0, s0, 0.5 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: v_add_f32_e64 v0, s4, 0.5 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %y = fadd float %x, 0.5 store float %y, float addrspace(1)* %out @@ -477,24 +477,24 @@ define amdgpu_kernel void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_neg_0.5_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_add_f32_e64 v0, s0, -0.5 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_add_f32_e64 v0, s4, -0.5 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_neg_0.5_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_f32_e64 v0, s0, -0.5 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: v_add_f32_e64 v0, s4, -0.5 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %y = fadd float %x, -0.5 store float %y, float addrspace(1)* %out @@ -504,24 +504,24 @@ define amdgpu_kernel void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_1.0_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_add_f32_e64 v0, s0, 1.0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_add_f32_e64 v0, s4, 1.0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_1.0_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_f32_e64 v0, s0, 1.0 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: v_add_f32_e64 v0, s4, 1.0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %y = fadd float %x, 1.0 store float %y, float addrspace(1)* %out @@ -531,24 +531,24 @@ define amdgpu_kernel void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_neg_1.0_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_add_f32_e64 v0, s0, -1.0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_add_f32_e64 v0, s4, -1.0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_neg_1.0_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_f32_e64 v0, s0, -1.0 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: v_add_f32_e64 v0, s4, -1.0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %y = fadd float %x, -1.0 store float %y, float addrspace(1)* %out @@ -558,24 +558,24 @@ define amdgpu_kernel void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_2.0_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_add_f32_e64 v0, s0, 2.0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_add_f32_e64 v0, s4, 2.0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_2.0_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_f32_e64 v0, s0, 2.0 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: v_add_f32_e64 v0, s4, 2.0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %y = fadd float %x, 2.0 store float %y, float addrspace(1)* %out @@ -585,24 +585,24 @@ define amdgpu_kernel void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_neg_2.0_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_add_f32_e64 v0, s0, -2.0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_add_f32_e64 v0, s4, -2.0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_neg_2.0_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_f32_e64 v0, s0, -2.0 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: v_add_f32_e64 v0, s4, -2.0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %y = fadd float %x, -2.0 store float %y, float addrspace(1)* %out @@ -612,24 +612,24 @@ define amdgpu_kernel void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_4.0_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_add_f32_e64 v0, s0, 4.0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_add_f32_e64 v0, s4, 4.0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_4.0_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_f32_e64 v0, s0, 4.0 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: v_add_f32_e64 v0, s4, 4.0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %y = fadd float %x, 4.0 store float %y, float addrspace(1)* %out @@ -639,24 +639,24 @@ define amdgpu_kernel void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_neg_4.0_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_add_f32_e64 v0, s0, -4.0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_add_f32_e64 v0, s4, -4.0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_neg_4.0_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_f32_e64 v0, s0, -4.0 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: v_add_f32_e64 v0, s4, -4.0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %y = fadd float %x, -4.0 store float %y, float addrspace(1)* %out @@ -666,38 +666,38 @@ define amdgpu_kernel void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) { ; SI-LABEL: commute_add_inline_imm_0.5_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_mov_b32 s4, s6 -; SI-NEXT: s_mov_b32 s5, s7 -; SI-NEXT: s_mov_b32 s6, s2 -; SI-NEXT: s_mov_b32 s7, s3 -; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_add_f32_e32 v0, 0.5, v0 -; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: commute_add_inline_imm_0.5_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_mov_b32 s10, s6 +; VI-NEXT: s_mov_b32 s11, s7 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s6, s2 -; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; VI-NEXT: s_mov_b32 s8, s2 +; VI-NEXT: s_mov_b32 s9, s3 +; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_add_f32_e32 v0, 0.5, v0 -; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm %x = load float, float addrspace(1)* %in %y = fadd float %x, 0.5 @@ -708,38 +708,38 @@ define amdgpu_kernel void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) { ; SI-LABEL: commute_add_literal_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_mov_b32 s4, s6 -; SI-NEXT: s_mov_b32 s5, s7 -; SI-NEXT: s_mov_b32 s6, s2 -; SI-NEXT: s_mov_b32 s7, s3 -; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_add_f32_e32 v0, 0x44800000, v0 -; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: commute_add_literal_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_mov_b32 s10, s6 +; VI-NEXT: s_mov_b32 s11, s7 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s6, s2 -; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; VI-NEXT: s_mov_b32 s8, s2 +; VI-NEXT: s_mov_b32 s9, s3 +; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_add_f32_e32 v0, 0x44800000, v0 -; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm %x = load float, float addrspace(1)* %in %y = fadd float %x, 1024.0 @@ -750,24 +750,24 @@ define amdgpu_kernel void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_1_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_add_f32_e64 v0, s0, 1 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_add_f32_e64 v0, s4, 1 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_1_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_f32_e64 v0, s0, 1 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: v_add_f32_e64 v0, s4, 1 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %y = fadd float %x, 0x36a0000000000000 store float %y, float addrspace(1)* %out @@ -777,24 +777,24 @@ define amdgpu_kernel void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_2_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_add_f32_e64 v0, s0, 2 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_add_f32_e64 v0, s4, 2 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_2_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_f32_e64 v0, s0, 2 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: v_add_f32_e64 v0, s4, 2 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %y = fadd float %x, 0x36b0000000000000 store float %y, float addrspace(1)* %out @@ -804,24 +804,24 @@ define amdgpu_kernel void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_16_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_add_f32_e64 v0, s0, 16 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_add_f32_e64 v0, s4, 16 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_16_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_f32_e64 v0, s0, 16 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: v_add_f32_e64 v0, s4, 16 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %y = fadd float %x, 0x36e0000000000000 store float %y, float addrspace(1)* %out @@ -831,26 +831,26 @@ define amdgpu_kernel void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_neg_1_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_add_i32 s0, s0, -1 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_add_i32 s4, s4, -1 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_neg_1_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_add_i32 s0, s0, -1 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: s_add_i32 s4, s4, -1 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %xbc = bitcast float %x to i32 %y = add i32 %xbc, -1 @@ -862,26 +862,26 @@ define amdgpu_kernel void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_neg_2_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_add_i32 s0, s0, -2 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_add_i32 s4, s4, -2 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_neg_2_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_add_i32 s0, s0, -2 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: s_add_i32 s4, s4, -2 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %xbc = bitcast float %x to i32 %y = add i32 %xbc, -2 @@ -893,26 +893,26 @@ define amdgpu_kernel void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_neg_16_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_add_i32 s0, s0, -16 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_add_i32 s4, s4, -16 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_neg_16_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_add_i32 s0, s0, -16 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: s_add_i32 s4, s4, -16 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %xbc = bitcast float %x to i32 %y = add i32 %xbc, -16 @@ -924,24 +924,24 @@ define amdgpu_kernel void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_63_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_add_f32_e64 v0, s0, 63 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_add_f32_e64 v0, s4, 63 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_63_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_f32_e64 v0, s0, 63 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: v_add_f32_e64 v0, s4, 63 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %y = fadd float %x, 0x36ff800000000000 store float %y, float addrspace(1)* %out @@ -951,24 +951,24 @@ define amdgpu_kernel void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) { ; SI-LABEL: add_inline_imm_64_f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_add_f32_e64 v0, s0, 64 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_add_f32_e64 v0, s4, 64 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: add_inline_imm_64_f32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_f32_e64 v0, s0, 64 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: v_add_f32_e64 v0, s4, 64 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %y = fadd float %x, 0x3700000000000000 store float %y, float addrspace(1)* %out