236
|
1 // RUN: %clang_cc1 -no-opaque-pointers %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=ALL,X86 %s
|
|
2 // RUN: %clang_cc1 -no-opaque-pointers %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN %s
|
|
3 // RUN: %clang_cc1 -no-opaque-pointers %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN,AMDGCN20 %s
|
|
4 // RUN: %clang_cc1 -no-opaque-pointers %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s
|
|
5 // RUN: %clang_cc1 -no-opaque-pointers %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn -cl-ext=+__opencl_c_program_scope_global_variables | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN,AMDGCN20 %s
|
|
6 // RUN: %clang_cc1 -no-opaque-pointers %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN %s
|
150
|
7
|
|
8 typedef int int2 __attribute__((ext_vector_type(2)));
|
|
9
|
|
10 typedef struct {
|
|
11 int cells[9];
|
|
12 } Mat3X3;
|
|
13
|
|
14 typedef struct {
|
|
15 int cells[16];
|
|
16 } Mat4X4;
|
|
17
|
|
18 typedef struct {
|
|
19 int cells[1024];
|
|
20 } Mat32X32;
|
|
21
|
|
22 typedef struct {
|
|
23 int cells[4096];
|
|
24 } Mat64X64;
|
|
25
|
|
26 struct StructOneMember {
|
|
27 int2 x;
|
|
28 };
|
|
29
|
|
30 struct StructTwoMember {
|
|
31 int2 x;
|
|
32 int2 y;
|
|
33 };
|
|
34
|
|
35 struct LargeStructOneMember {
|
|
36 int2 x[100];
|
|
37 };
|
|
38
|
|
39 struct LargeStructTwoMember {
|
|
40 int2 x[40];
|
|
41 int2 y[20];
|
|
42 };
|
|
43
|
223
|
44 #if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
|
150
|
45 struct LargeStructOneMember g_s;
|
|
46 #endif
|
|
47
|
236
|
48 // X86-LABEL: define{{.*}} void @foo(%struct.Mat4X4* noalias sret(%struct.Mat4X4) align 4 %agg.result, %struct.Mat3X3* noundef byval(%struct.Mat3X3) align 4 %in)
|
221
|
49 // AMDGCN-LABEL: define{{.*}} %struct.Mat4X4 @foo([9 x i32] %in.coerce)
|
150
|
50 Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
|
|
51 Mat4X4 out;
|
|
52 return out;
|
|
53 }
|
|
54
|
173
|
55 // ALL-LABEL: define {{.*}} void @ker
|
150
|
56 // Expect two mem copies: one for the argument "in", and one for
|
|
57 // the return value.
|
|
58 // X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
|
|
59 // X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
|
|
60
|
|
61 // AMDGCN: load [9 x i32], [9 x i32] addrspace(1)*
|
|
62 // AMDGCN: call %struct.Mat4X4 @foo([9 x i32]
|
|
63 // AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
|
|
64 kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
|
|
65 out[0] = foo(in[1]);
|
|
66 }
|
|
67
|
236
|
68 // X86-LABEL: define{{.*}} void @foo_large(%struct.Mat64X64* noalias sret(%struct.Mat64X64) align 4 %agg.result, %struct.Mat32X32* noundef byval(%struct.Mat32X32) align 4 %in)
|
|
69 // AMDGCN-LABEL: define{{.*}} void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret(%struct.Mat64X64) align 4 %agg.result, %struct.Mat32X32 addrspace(5)* noundef byval(%struct.Mat32X32) align 4 %in)
|
150
|
70 Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
|
|
71 Mat64X64 out;
|
|
72 return out;
|
|
73 }
|
|
74
|
173
|
75 // ALL-LABEL: define {{.*}} void @ker_large
|
150
|
76 // Expect two mem copies: one for the argument "in", and one for
|
|
77 // the return value.
|
|
78 // X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
|
|
79 // X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
|
|
80 // AMDGCN: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)*
|
|
81 // AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
|
|
82 kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
|
|
83 out[0] = foo_large(in[1]);
|
|
84 }
|
|
85
|
221
|
86 // AMDGCN-LABEL: define{{.*}} void @FuncOneMember(<2 x i32> %u.coerce)
|
150
|
87 void FuncOneMember(struct StructOneMember u) {
|
|
88 u.x = (int2)(0, 0);
|
|
89 }
|
|
90
|
236
|
91 // AMDGCN-LABEL: define{{.*}} void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* noundef byval(%struct.LargeStructOneMember) align 8 %u)
|
150
|
92 // AMDGCN-NOT: addrspacecast
|
|
93 // AMDGCN: store <2 x i32> %{{.*}}, <2 x i32> addrspace(5)*
|
|
94 void FuncOneLargeMember(struct LargeStructOneMember u) {
|
|
95 u.x[0] = (int2)(0, 0);
|
|
96 }
|
|
97
|
221
|
98 // AMDGCN20-LABEL: define{{.*}} void @test_indirect_arg_globl()
|
150
|
99 // AMDGCN20: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
|
|
100 // AMDGCN20: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
|
|
101 // AMDGCN20: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false)
|
236
|
102 // AMDGCN20: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* noundef byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]])
|
223
|
103 #if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
|
150
|
104 void test_indirect_arg_globl(void) {
|
|
105 FuncOneLargeMember(g_s);
|
|
106 }
|
|
107 #endif
|
|
108
|
221
|
109 // AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @test_indirect_arg_local()
|
150
|
110 // AMDGCN: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
|
|
111 // AMDGCN: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
|
|
112 // AMDGCN: call void @llvm.memcpy.p5i8.p3i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(3)* align 8 bitcast (%struct.LargeStructOneMember addrspace(3)* @test_indirect_arg_local.l_s to i8 addrspace(3)*), i64 800, i1 false)
|
236
|
113 // AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* noundef byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]])
|
150
|
114 kernel void test_indirect_arg_local(void) {
|
|
115 local struct LargeStructOneMember l_s;
|
|
116 FuncOneLargeMember(l_s);
|
|
117 }
|
|
118
|
221
|
119 // AMDGCN-LABEL: define{{.*}} void @test_indirect_arg_private()
|
150
|
120 // AMDGCN: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
|
|
121 // AMDGCN-NOT: @llvm.memcpy
|
236
|
122 // AMDGCN-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* noundef byval(%struct.LargeStructOneMember) align 8 %[[p_s]])
|
150
|
123 void test_indirect_arg_private(void) {
|
|
124 struct LargeStructOneMember p_s;
|
|
125 FuncOneLargeMember(p_s);
|
|
126 }
|
|
127
|
221
|
128 // AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelOneMember
|
150
|
129 // AMDGCN-SAME: (<2 x i32> %[[u_coerce:.*]])
|
|
130 // AMDGCN: %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
|
|
131 // AMDGCN: %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0
|
|
132 // AMDGCN: store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]]
|
|
133 // AMDGCN: call void @FuncOneMember(<2 x i32>
|
|
134 kernel void KernelOneMember(struct StructOneMember u) {
|
|
135 FuncOneMember(u);
|
|
136 }
|
|
137
|
|
138 // SPIR: call void @llvm.memcpy.p0i8.p1i8.i32
|
|
139 // SPIR-NOT: addrspacecast
|
|
140 kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
|
|
141 FuncOneMember(*u);
|
|
142 }
|
|
143
|
221
|
144 // AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelLargeOneMember(
|
150
|
145 // AMDGCN: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
|
|
146 // AMDGCN: store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8
|
236
|
147 // AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* noundef byval(%struct.LargeStructOneMember) align 8 %[[U]])
|
150
|
148 kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
|
|
149 FuncOneLargeMember(u);
|
|
150 }
|
|
151
|
221
|
152 // AMDGCN-LABEL: define{{.*}} void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1)
|
150
|
153 void FuncTwoMember(struct StructTwoMember u) {
|
|
154 u.y = (int2)(0, 0);
|
|
155 }
|
|
156
|
236
|
157 // AMDGCN-LABEL: define{{.*}} void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* noundef byval(%struct.LargeStructTwoMember) align 8 %u)
|
150
|
158 void FuncLargeTwoMember(struct LargeStructTwoMember u) {
|
|
159 u.y[0] = (int2)(0, 0);
|
|
160 }
|
|
161
|
221
|
162 // AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelTwoMember
|
150
|
163 // AMDGCN-SAME: (%struct.StructTwoMember %[[u_coerce:.*]])
|
|
164 // AMDGCN: %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
|
|
165 // AMDGCN: %[[LD0:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
|
|
166 // AMDGCN: %[[LD1:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
|
|
167 // AMDGCN: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]])
|
|
168 kernel void KernelTwoMember(struct StructTwoMember u) {
|
|
169 FuncTwoMember(u);
|
|
170 }
|
|
171
|
221
|
172 // AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelLargeTwoMember
|
150
|
173 // AMDGCN-SAME: (%struct.LargeStructTwoMember %[[u_coerce:.*]])
|
|
174 // AMDGCN: %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
|
|
175 // AMDGCN: store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]]
|
236
|
176 // AMDGCN: call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* noundef byval(%struct.LargeStructTwoMember) align 8 %[[u]])
|
150
|
177 kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
|
|
178 FuncLargeTwoMember(u);
|
|
179 }
|