diff llvm/test/CodeGen/AMDGPU/lds-alignment.ll @ 221:79ff65ed7e25

LLVM12 Original
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 15 Jun 2021 19:15:29 +0900
parents 1d019706d866
children 1f2b6ac9f198
line wrap: on
line diff
--- a/llvm/test/CodeGen/AMDGPU/lds-alignment.ll	Tue Jun 15 19:13:43 2021 +0900
+++ b/llvm/test/CodeGen/AMDGPU/lds-alignment.ll	Tue Jun 15 19:15:29 2021 +0900
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck -check-prefix=HSA %s
 
 @lds.align16.0 = internal unnamed_addr addrspace(3) global [38 x i8] undef, align 16
 @lds.align16.1 = internal unnamed_addr addrspace(3) global [38 x i8] undef, align 16
@@ -46,7 +46,7 @@
   ret void
 }
 
-; 38 + (10 pad) + 38
+; 38 + (10 pad) + 38  (= 86)
 ; HSA-LABEL: {{^}}test_round_size_2_align_8:
 ; HSA: workgroup_group_segment_byte_size = 86
 ; HSA: group_segment_alignment = 4
@@ -94,9 +94,9 @@
   ret void
 }
 
-; (7 * 8) + (39 * 4) = 212
+; (39 * 4) + (4 pad) + (7 * 8) = 216
 ; HSA-LABEL: {{^}}test_missing_alignment_size_2_order0:
-; HSA: workgroup_group_segment_byte_size = 212
+; HSA: workgroup_group_segment_byte_size = 216
 ; HSA: group_segment_alignment = 4
 define amdgpu_kernel void @test_missing_alignment_size_2_order0(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
   %lds.missing.align.0.bc = bitcast [39 x i32] addrspace(3)* @lds.missing.align.0 to i8 addrspace(3)*
@@ -125,20 +125,9 @@
 
   ret void
 }
-; Test how the size needed for padding changes based on when the
-; global is encountered during lowering. There should be a consistent
-; order to minimize padding waste.
-;
-; The way global addresses are lowered now, this is in inverse of
-; first use order which isn't great.
-;
-; This should be the optimal order for these globals. If sorted to
-; minimize padding, the minimum possible size is: align 32, align 8,
-; align 16
 
-
-; align 32, 16, 8
-; 38 + (10 pad) + 38 + (10 pad) + 38 = 134
+; align 32, 16, 16
+; 38 + (10 pad) + 38 + (10 pad) + 38  ( = 134)
 ; HSA-LABEL: {{^}}test_round_size_3_order0:
 ; HSA: workgroup_group_segment_byte_size = 134
 ; HSA: group_segment_alignment = 4
@@ -158,8 +147,8 @@
   ret void
 }
 
-; align 32, 8, 16
-; 38 (+ 2 pad) + 38 + (18 pad) + 38 = 134
+; align 32, 16, 16
+; 38 (+ 10 pad) + 38 + (10 pad) + 38 ( = 134)
 ; HSA-LABEL: {{^}}test_round_size_3_order1:
 ; HSA: workgroup_group_segment_byte_size = 134
 ; HSA: group_segment_alignment = 4
@@ -179,10 +168,10 @@
   ret void
 }
 
-; align 16, 32, 8
-; 38 + (26 pad) + 38 + (10 pad) + 38 = 150
+; align 32, 16, 16
+; 38 + (10 pad) + 38 + (10 pad) + 38  ( = 126)
 ; HSA-LABEL: {{^}}test_round_size_3_order2:
-; HSA: workgroup_group_segment_byte_size = 150
+; HSA: workgroup_group_segment_byte_size = 134
 ; HSA: group_segment_alignment = 4
 define amdgpu_kernel void @test_round_size_3_order2(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
   %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
@@ -200,10 +189,10 @@
   ret void
 }
 
-; align 16, 8, 32
-; 38 + (2 pad) + 38 + (2 pad) + 38
+; align 32, 16, 16
+; 38 + (10 pad) + 38 + (10 pad) + 38 ( = 134)
 ; HSA-LABEL: {{^}}test_round_size_3_order3:
-; HSA: workgroup_group_segment_byte_size = 118
+; HSA: workgroup_group_segment_byte_size = 134
 ; HSA: group_segment_alignment = 4
 define amdgpu_kernel void @test_round_size_3_order3(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
   %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
@@ -221,10 +210,10 @@
   ret void
 }
 
-; align 8, 32, 16
-; 38 + (26 pad) + 38 + (2 pad) + 38 = 142
+; align 32, 16, 16
+; 38 + (10 pad) + 38 + (10 pad) + 38  (= 134)
 ; HSA-LABEL: {{^}}test_round_size_3_order4:
-; HSA: workgroup_group_segment_byte_size = 142
+; HSA: workgroup_group_segment_byte_size = 134
 ; HSA: group_segment_alignment = 4
 define amdgpu_kernel void @test_round_size_3_order4(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
   %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*
@@ -242,10 +231,10 @@
   ret void
 }
 
-; align 8, 16, 32
-; 38 + (10 pad) + 38 + (2 pad) + 38 = 126
+; align 32, 16, 16
+; 38 + (10 pad) + 38 + (10 pad) + 38  (= 134)
 ; HSA-LABEL: {{^}}test_round_size_3_order5:
-; HSA: workgroup_group_segment_byte_size = 126
+; HSA: workgroup_group_segment_byte_size = 134
 ; HSA: group_segment_alignment = 4
 define amdgpu_kernel void @test_round_size_3_order5(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
   %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*