comparison llvm/test/CodeGen/AMDGPU/ds_write2.ll @ 223:5f17cb93ff66 llvm-original

LLVM13 (2021/7/18)
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sun, 18 Jul 2021 22:43:00 +0900
parents 79ff65ed7e25
children c4bab56944e8
comparison
equal deleted inserted replaced
222:81f6424ef0e3 223:5f17cb93ff66
772 @foo = addrspace(3) global [4 x i32] undef, align 4 772 @foo = addrspace(3) global [4 x i32] undef, align 4
773 773
774 define amdgpu_kernel void @store_constant_adjacent_offsets() { 774 define amdgpu_kernel void @store_constant_adjacent_offsets() {
775 ; CI-LABEL: store_constant_adjacent_offsets: 775 ; CI-LABEL: store_constant_adjacent_offsets:
776 ; CI: ; %bb.0: 776 ; CI: ; %bb.0:
777 ; CI-NEXT: s_movk_i32 s0, 0x7b 777 ; CI-NEXT: v_mov_b32_e32 v0, 0x7b
778 ; CI-NEXT: v_mov_b32_e32 v0, 0 778 ; CI-NEXT: v_mov_b32_e32 v1, v0
779 ; CI-NEXT: v_mov_b32_e32 v1, s0 779 ; CI-NEXT: v_mov_b32_e32 v2, 0
780 ; CI-NEXT: v_mov_b32_e32 v2, s0 780 ; CI-NEXT: s_mov_b32 m0, -1
781 ; CI-NEXT: s_mov_b32 m0, -1 781 ; CI-NEXT: ds_write_b64 v2, v[0:1]
782 ; CI-NEXT: ds_write2_b32 v0, v1, v2 offset1:1
783 ; CI-NEXT: s_endpgm 782 ; CI-NEXT: s_endpgm
784 ; 783 ;
785 ; GFX9-LABEL: store_constant_adjacent_offsets: 784 ; GFX9-LABEL: store_constant_adjacent_offsets:
786 ; GFX9: ; %bb.0: 785 ; GFX9: ; %bb.0:
787 ; GFX9-NEXT: s_movk_i32 s0, 0x7b 786 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
788 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 787 ; GFX9-NEXT: v_mov_b32_e32 v1, v0
789 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 788 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
790 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 789 ; GFX9-NEXT: ds_write_b64 v2, v[0:1]
791 ; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:1
792 ; GFX9-NEXT: s_endpgm 790 ; GFX9-NEXT: s_endpgm
793 store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 791 store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
794 store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4 792 store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
795 ret void 793 ret void
796 } 794 }
818 @bar = addrspace(3) global [4 x i64] undef, align 4 816 @bar = addrspace(3) global [4 x i64] undef, align 4
819 817
820 define amdgpu_kernel void @store_misaligned64_constant_offsets() { 818 define amdgpu_kernel void @store_misaligned64_constant_offsets() {
821 ; CI-LABEL: store_misaligned64_constant_offsets: 819 ; CI-LABEL: store_misaligned64_constant_offsets:
822 ; CI: ; %bb.0: 820 ; CI: ; %bb.0:
823 ; CI-NEXT: s_movk_i32 s0, 0x7b 821 ; CI-NEXT: v_mov_b32_e32 v0, 0x7b
824 ; CI-NEXT: s_mov_b32 s1, 0 822 ; CI-NEXT: v_mov_b32_e32 v1, 0
823 ; CI-NEXT: v_mov_b32_e32 v2, v0
824 ; CI-NEXT: v_mov_b32_e32 v3, v1
825 ; CI-NEXT: s_mov_b32 m0, -1
826 ; CI-NEXT: ds_write_b128 v1, v[0:3]
827 ; CI-NEXT: s_endpgm
828 ;
829 ; GFX9-LABEL: store_misaligned64_constant_offsets:
830 ; GFX9: ; %bb.0:
831 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
832 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
833 ; GFX9-NEXT: v_mov_b32_e32 v2, v0
834 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
835 ; GFX9-NEXT: ds_write_b128 v1, v[0:3]
836 ; GFX9-NEXT: s_endpgm
837 store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
838 store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
839 ret void
840 }
841
842 @bar.large = addrspace(3) global [4096 x i64] undef, align 4
843
844 define amdgpu_kernel void @store_misaligned64_constant_large_offsets() {
845 ; CI-LABEL: store_misaligned64_constant_large_offsets:
846 ; CI: ; %bb.0:
847 ; CI-NEXT: s_mov_b64 s[0:1], 0x7b
825 ; CI-NEXT: v_mov_b32_e32 v0, s0 848 ; CI-NEXT: v_mov_b32_e32 v0, s0
826 ; CI-NEXT: v_mov_b32_e32 v2, 0 849 ; CI-NEXT: v_mov_b32_e32 v2, 0
827 ; CI-NEXT: v_mov_b32_e32 v1, s1 850 ; CI-NEXT: v_mov_b32_e32 v1, s1
828 ; CI-NEXT: s_mov_b32 m0, -1 851 ; CI-NEXT: s_mov_b32 m0, -1
829 ; CI-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset1:1
830 ; CI-NEXT: s_endpgm
831 ;
832 ; GFX9-ALIGNED-LABEL: store_misaligned64_constant_offsets:
833 ; GFX9-ALIGNED: ; %bb.0:
834 ; GFX9-ALIGNED-NEXT: s_movk_i32 s0, 0x7b
835 ; GFX9-ALIGNED-NEXT: s_mov_b32 s1, 0
836 ; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v0, s0
837 ; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v2, 0
838 ; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v1, s1
839 ; GFX9-ALIGNED-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset1:1
840 ; GFX9-ALIGNED-NEXT: s_endpgm
841 ;
842 ; GFX9-UNALIGNED-LABEL: store_misaligned64_constant_offsets:
843 ; GFX9-UNALIGNED: ; %bb.0:
844 ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0x7b
845 ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v1, 0
846 ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v2, v0
847 ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v3, v1
848 ; GFX9-UNALIGNED-NEXT: ds_write_b128 v1, v[0:3]
849 ; GFX9-UNALIGNED-NEXT: s_endpgm
850 store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
851 store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
852 ret void
853 }
854
855 @bar.large = addrspace(3) global [4096 x i64] undef, align 4
856
857 define amdgpu_kernel void @store_misaligned64_constant_large_offsets() {
858 ; CI-LABEL: store_misaligned64_constant_large_offsets:
859 ; CI: ; %bb.0:
860 ; CI-NEXT: s_movk_i32 s0, 0x7b
861 ; CI-NEXT: s_mov_b32 s1, 0
862 ; CI-NEXT: v_mov_b32_e32 v0, s0
863 ; CI-NEXT: v_mov_b32_e32 v2, 0
864 ; CI-NEXT: v_mov_b32_e32 v1, s1
865 ; CI-NEXT: s_mov_b32 m0, -1
866 ; CI-NEXT: ds_write_b64 v2, v[0:1] offset:16384 852 ; CI-NEXT: ds_write_b64 v2, v[0:1] offset:16384
867 ; CI-NEXT: ds_write_b64 v2, v[0:1] offset:32760 853 ; CI-NEXT: ds_write_b64 v2, v[0:1] offset:32760
868 ; CI-NEXT: s_endpgm 854 ; CI-NEXT: s_endpgm
869 ; 855 ;
870 ; GFX9-LABEL: store_misaligned64_constant_large_offsets: 856 ; GFX9-LABEL: store_misaligned64_constant_large_offsets:
871 ; GFX9: ; %bb.0: 857 ; GFX9: ; %bb.0:
872 ; GFX9-NEXT: s_movk_i32 s0, 0x7b 858 ; GFX9-NEXT: s_mov_b64 s[0:1], 0x7b
873 ; GFX9-NEXT: s_mov_b32 s1, 0
874 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 859 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
875 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 860 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
876 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 861 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
877 ; GFX9-NEXT: ds_write_b64 v2, v[0:1] offset:16384 862 ; GFX9-NEXT: ds_write_b64 v2, v[0:1] offset:16384
878 ; GFX9-NEXT: ds_write_b64 v2, v[0:1] offset:32760 863 ; GFX9-NEXT: ds_write_b64 v2, v[0:1] offset:32760