252
|
1 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
|
|
2 // RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++17 -fgpu-rdc \
|
|
3 // RUN: -emit-llvm -o - -x hip %s > %t.dev
|
|
4
|
|
5 // RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
|
|
6 // RUN: -aux-triple amdgcn-amd-amdhsa -std=c++17 -fgpu-rdc \
|
|
7 // RUN: -emit-llvm -o - -x hip %s > %t.host
|
|
8
|
|
9 // RUN: cat %t.dev %t.host | FileCheck -check-prefixes=HIP,COMMON %s
|
|
10 // RUN: cat %t.dev %t.host | FileCheck -check-prefixes=COMNEG %s
|
|
11
|
|
12 // RUN: echo "GPU binary" > %t.fatbin
|
|
13
|
|
14 // RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \
|
|
15 // RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++17 -fgpu-rdc \
|
|
16 // RUN: -emit-llvm -o - %s > %t.dev
|
|
17
|
|
18 // RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
|
|
19 // RUN: -aux-triple nvptx -std=c++17 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \
|
|
20 // RUN: -emit-llvm -o - %s > %t.host
|
|
21
|
|
22 // RUN: cat %t.dev %t.host | FileCheck -check-prefixes=CUDA,COMMON %s
|
|
23 // RUN: cat %t.dev %t.host | FileCheck -check-prefixes=COMNEG %s
|
|
24
|
|
25 #include "Inputs/cuda.h"
|
|
26
|
|
27 // HIP-DAG: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]](
|
|
28 // HIP-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]](
|
|
29 // HIP-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]](
|
|
30 // HIP-DAG: @[[VM:_ZN12_GLOBAL__N_12vmE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global
|
|
31 // HIP-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE\.static\.b04fd23c98500190]] = addrspace(4) externally_initialized global
|
|
32 // HIP-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global
|
|
33
|
|
34 // CUDA-DAG: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]](
|
|
35 // CUDA-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]](
|
|
36 // CUDA-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]](
|
|
37 // CUDA-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE__static__b04fd23c98500190]] = addrspace(4) externally_initialized global
|
|
38 // CUDA-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE__static__b04fd23c98500190]] = addrspace(1) externally_initialized global
|
|
39
|
|
40 // COMMON-DAG: @_ZN12_GLOBAL__N_12vdE = internal addrspace(1) global
|
|
41 // COMNEG-NOT: @{{.*}} = {{.*}} c"_ZN12_GLOBAL__N_12vdE{{.*}}\00"
|
|
42
|
|
43 // HIP-DAG: @llvm.compiler.used = {{.*}}@[[VM]]{{.*}}@[[VT]]{{.*}}@[[VC]]
|
|
44 // CUDA-DAG: @llvm.compiler.used = {{.*}}@[[VT]]{{.*}}@[[VC]]
|
|
45
|
|
46 // COMMON-DAG: @[[KERNSTR:.*]] = {{.*}} c"[[KERN]]\00"
|
|
47 // COMMON-DAG: @[[KTXSTR:.*]] = {{.*}} c"[[KTX]]\00"
|
|
48 // COMMON-DAG: @[[KTLSTR:.*]] = {{.*}} c"[[KTL]]\00"
|
|
49 // HIP-DAG: @[[VMSTR:.*]] = {{.*}} c"[[VM]]\00"
|
|
50 // COMMON-DAG: @[[VCSTR:.*]] = {{.*}} c"[[VC]]\00"
|
|
51 // COMMON-DAG: @[[VTSTR:.*]] = {{.*}} c"[[VT]]\00"
|
|
52
|
|
53 // COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[KERNSTR]]
|
|
54 // COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[KTXSTR]]
|
|
55 // COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[KTLSTR]]
|
|
56 // HIP-DAG: call void @__{{.*}}RegisterManagedVar({{.*}}@[[VMSTR]]
|
|
57 // COMMON-DAG: call void @__{{.*}}RegisterVar({{.*}}@[[VCSTR]]
|
|
58 // COMMON-DAG: call void @__{{.*}}RegisterVar({{.*}}@[[VTSTR]]
|
|
59
|
|
60 template <typename T>
|
|
61 __global__ void kt(T x) {}
|
|
62
|
|
63 template <typename T>
|
|
64 __device__ T vt;
|
|
65
|
|
66 namespace {
|
|
67 struct X {};
|
|
68 X x;
|
|
69 auto lambda = [](){};
|
|
70 #if __HIP__
|
|
71 __managed__ int vm = 1;
|
|
72 #endif
|
|
73 __constant__ int vc = 2;
|
|
74
|
|
75 // C should not be externalized since it is used by device code only.
|
|
76 __device__ int vd = 3;
|
|
77 __global__ void kernel() { vd = 4; }
|
|
78 }
|
|
79
|
|
80 template<typename T>
|
|
81 void getSymbol(T *x) {}
|
|
82
|
|
83 void test() {
|
|
84 kernel<<<1, 1>>>();
|
|
85
|
|
86 kt<<<1, 1>>>(x);
|
|
87
|
|
88 kt<<<1, 1>>>(lambda);
|
|
89
|
|
90 // A, B, and tempVar<X> should be externalized since they are
|
|
91 // used by host code.
|
|
92 #if __HIP__
|
|
93 getSymbol(&vm);
|
|
94 #endif
|
|
95 getSymbol(&vc);
|
|
96 getSymbol(&vt<X>);
|
|
97 }
|