Mercurial > hg > CbC > CbC_llvm
diff llvm/test/CodeGen/AMDGPU/lds-alignment.ll @ 221:79ff65ed7e25
LLVM12 Original
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 15 Jun 2021 19:15:29 +0900 |
parents | 1d019706d866 |
children | 1f2b6ac9f198 |
line wrap: on
line diff
--- a/llvm/test/CodeGen/AMDGPU/lds-alignment.ll Tue Jun 15 19:13:43 2021 +0900 +++ b/llvm/test/CodeGen/AMDGPU/lds-alignment.ll Tue Jun 15 19:15:29 2021 +0900 @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=HSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck -check-prefix=HSA %s @lds.align16.0 = internal unnamed_addr addrspace(3) global [38 x i8] undef, align 16 @lds.align16.1 = internal unnamed_addr addrspace(3) global [38 x i8] undef, align 16 @@ -46,7 +46,7 @@ ret void } -; 38 + (10 pad) + 38 +; 38 + (10 pad) + 38 (= 86) ; HSA-LABEL: {{^}}test_round_size_2_align_8: ; HSA: workgroup_group_segment_byte_size = 86 ; HSA: group_segment_alignment = 4 @@ -94,9 +94,9 @@ ret void } -; (7 * 8) + (39 * 4) = 212 +; (39 * 4) + (4 pad) + (7 * 8) = 216 ; HSA-LABEL: {{^}}test_missing_alignment_size_2_order0: -; HSA: workgroup_group_segment_byte_size = 212 +; HSA: workgroup_group_segment_byte_size = 216 ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_missing_alignment_size_2_order0(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.missing.align.0.bc = bitcast [39 x i32] addrspace(3)* @lds.missing.align.0 to i8 addrspace(3)* @@ -125,20 +125,9 @@ ret void } -; Test how the size needed for padding changes based on when the -; global is encountered during lowering. There should be a consistent -; order to minimize padding waste. -; -; The way global addresses are lowered now, this is in inverse of -; first use order which isn't great. -; -; This should be the optimal order for these globals. If sorted to -; minimize padding, the minimum possible size is: align 32, align 8, -; align 16 - -; align 32, 16, 8 -; 38 + (10 pad) + 38 + (10 pad) + 38 = 134 +; align 32, 16, 16 +; 38 + (10 pad) + 38 + (10 pad) + 38 ( = 134) ; HSA-LABEL: {{^}}test_round_size_3_order0: ; HSA: workgroup_group_segment_byte_size = 134 ; HSA: group_segment_alignment = 4 @@ -158,8 +147,8 @@ ret void } -; align 32, 8, 16 -; 38 (+ 2 pad) + 38 + (18 pad) + 38 = 134 +; align 32, 16, 16 +; 38 (+ 10 pad) + 38 + (10 pad) + 38 ( = 134) ; HSA-LABEL: {{^}}test_round_size_3_order1: ; HSA: workgroup_group_segment_byte_size = 134 ; HSA: group_segment_alignment = 4 @@ -179,10 +168,10 @@ ret void } -; align 16, 32, 8 -; 38 + (26 pad) + 38 + (10 pad) + 38 = 150 +; align 32, 16, 16 +; 38 + (10 pad) + 38 + (10 pad) + 38 ( = 126) ; HSA-LABEL: {{^}}test_round_size_3_order2: -; HSA: workgroup_group_segment_byte_size = 150 +; HSA: workgroup_group_segment_byte_size = 134 ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order2(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* @@ -200,10 +189,10 @@ ret void } -; align 16, 8, 32 -; 38 + (2 pad) + 38 + (2 pad) + 38 +; align 32, 16, 16 +; 38 + (10 pad) + 38 + (10 pad) + 38 ( = 134) ; HSA-LABEL: {{^}}test_round_size_3_order3: -; HSA: workgroup_group_segment_byte_size = 118 +; HSA: workgroup_group_segment_byte_size = 134 ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order3(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* @@ -221,10 +210,10 @@ ret void } -; align 8, 32, 16 -; 38 + (26 pad) + 38 + (2 pad) + 38 = 142 +; align 32, 16, 16 +; 38 + (10 pad) + 38 + (10 pad) + 38 (= 134) ; HSA-LABEL: {{^}}test_round_size_3_order4: -; HSA: workgroup_group_segment_byte_size = 142 +; HSA: workgroup_group_segment_byte_size = 134 ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order4(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* @@ -242,10 +231,10 @@ ret void } -; align 8, 16, 32 -; 38 + (10 pad) + 38 + (2 pad) + 38 = 126 +; align 32, 16, 16 +; 38 + (10 pad) + 38 + (10 pad) + 38 (= 134) ; HSA-LABEL: {{^}}test_round_size_3_order5: -; HSA: workgroup_group_segment_byte_size = 126 +; HSA: workgroup_group_segment_byte_size = 134 ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order5(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*