Mercurial > hg > CbC > CbC_llvm
comparison clang/test/Driver/cuda-phases.cu @ 150:1d019706d866
LLVM10
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 15:10:13 +0900 |
parents | |
children | 2e18cbf3894f |
comparison
equal
deleted
inserted
replaced
147:c2174574ed3a | 150:1d019706d866 |
---|---|
1 // Tests the phases generated for a CUDA offloading target for different | |
2 // combinations of: | |
3 // - Number of gpu architectures; | |
4 // - Host/device-only compilation; | |
5 // - User-requested final phase - binary or assembly. | |
6 | |
7 // REQUIRES: clang-driver | |
8 // REQUIRES: powerpc-registered-target | |
9 // REQUIRES: nvptx-registered-target | |
10 // REQUIRES: amdgpu-registered-target | |
11 // | |
12 // Test single gpu architecture with complete compilation. | |
13 // | |
14 // Test CUDA NVPTX phases. | |
15 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
16 // RUN: --cuda-gpu-arch=sm_30 %s 2>&1 \ | |
17 // RUN: | FileCheck -check-prefixes=BIN,BIN_NV %s | |
18 // | |
19 // Test HIP AMDGPU -fgpu-rdc phases. | |
20 // RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
21 // RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \ | |
22 // RUN: | FileCheck -check-prefixes=BIN,BIN_AMD,BIN_AMD_RDC %s | |
23 // | |
24 // Test HIP AMDGPU -fno-gpu-rdc phases (default). | |
25 // RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
26 // RUN: --cuda-gpu-arch=gfx803 %s 2>&1 \ | |
27 // RUN: | FileCheck -check-prefixes=BIN,BIN_AMD,BIN_AMD_NRDC %s | |
28 // | |
29 // BIN_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) | |
30 // BIN_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]]) | |
31 // BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) | |
32 // BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) | |
33 // BIN_NV-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:sm_30]]) | |
34 // BIN_AMD-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:gfx803]]) | |
35 // BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) | |
36 // BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]]) | |
37 // BIN_NV-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]]) | |
38 // BIN_NV-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]]) | |
39 // BIN_NV-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P7]]}, object | |
40 // BIN_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH]])" {[[P6]]}, assembler | |
41 // BIN_NV-DAG: [[P10:[0-9]+]]: linker, {[[P8]], [[P9]]}, cuda-fatbin, (device-[[T]]) | |
42 // BIN_NV-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P10]]}, ir | |
43 // BIN_NV-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]]) | |
44 // BIN_AMD_RDC-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) | |
45 // BIN_AMD_NRDC-DAG: [[P6:[0-9]+]]: linker, {[[P5]]}, image, (device-hip, [[ARCH]]) | |
46 // BIN_AMD_NRDC-DAG: [[P7:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[P6]]}, image | |
47 // BIN_AMD_NRDC-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, hip-fatbin, (device-hip) | |
48 // BIN_AMD_NRDC-DAG: [[P11:[0-9]+]]: offload, "host-hip (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-hip (amdgcn-amd-amdhsa)" {[[P8]]}, ir | |
49 // BIN_AMD_NRDC-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]]) | |
50 // BIN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]]) | |
51 // BIN-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]]) | |
52 // BIN_AMD_RDC-DAG: [[P15:[0-9]+]]: linker, {[[P5]]}, image, (device-[[T]], [[ARCH]]) | |
53 // BIN_AMD_RDC-DAG: [[P16:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P14]]}, | |
54 // BIN_AMD_RDC-DAG-SAME: "device-[[T]] ([[TRIPLE:amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P15]]}, object | |
55 | |
56 // | |
57 // Test single gpu architecture up to the assemble phase. | |
58 // | |
59 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
60 // RUN: --cuda-gpu-arch=sm_30 %s -S 2>&1 \ | |
61 // RUN: | FileCheck -check-prefixes=ASM,ASM_NV %s | |
62 // RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
63 // RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s -S 2>&1 \ | |
64 // RUN: | FileCheck -check-prefixes=ASM,ASM_AMD %s | |
65 // RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
66 // RUN: --cuda-gpu-arch=gfx803 -fcuda-rdc %s -S 2>&1 \ | |
67 // RUN: | FileCheck -check-prefixes=ASM,ASM_AMD %s | |
68 // ASM_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) | |
69 // ASM_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) | |
70 // ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) | |
71 // ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) | |
72 // ASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) | |
73 // ASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler | |
74 // ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (host-[[T]]) | |
75 // ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (host-[[T]]) | |
76 // ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-[[T]]) | |
77 // ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-[[T]]) | |
78 | |
79 // | |
80 // Test two gpu architectures with complete compilation. | |
81 // | |
82 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
83 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \ | |
84 // RUN: | FileCheck -check-prefixes=BIN2,BIN2_NV %s | |
85 // RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
86 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %s 2>&1 \ | |
87 // RUN: | FileCheck -check-prefixes=BIN2,BIN2_AMD %s | |
88 // BIN2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) | |
89 // BIN2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]]) | |
90 // BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) | |
91 // BIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) | |
92 // BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH1:sm_30|gfx803]]) | |
93 // BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) | |
94 // BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]]) | |
95 // BIN2_NV-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]]) | |
96 // BIN2_NV-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]]) | |
97 // BIN2_NV-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH1]])" {[[P7]]}, object | |
98 // BIN2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH1]])" {[[P6]]}, assembler | |
99 // BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]]) | |
100 // BIN2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) | |
101 // BIN2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]]) | |
102 // BIN2_NV-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]]) | |
103 // BIN2_NV-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]]) | |
104 // BIN2_NV-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P14]]}, object | |
105 // BIN2_NV-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P13]]}, assembler | |
106 // BIN2_NV-DAG: [[P17:[0-9]+]]: linker, {[[P8]], [[P9]], [[P15]], [[P16]]}, cuda-fatbin, (device-[[T]]) | |
107 // BIN2_NV-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P17]]}, ir | |
108 // BIN2_NV-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]]) | |
109 // BIN2_AMD-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) | |
110 // BIN2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]]) | |
111 // BIN2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]]) | |
112 // BIN2_AMD-DAG: [[P22:[0-9]+]]: linker, {[[P5]]}, image, (device-[[T]], [[ARCH1]]) | |
113 // BIN2_AMD-DAG: [[P23:[0-9]+]]: linker, {[[P12]]}, image, (device-[[T]], [[ARCH2]]) | |
114 // BIN2_AMD-DAG: [[P24:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P21]]}, | |
115 // BIN2_AMD-DAG-SAME: "device-[[T]] ([[TRIPLE:amdgcn-amd-amdhsa]]:[[ARCH1]])" {[[P22]]}, | |
116 // BIN2_AMD-DAG-SAME: "device-[[T]] ([[TRIPLE:amdgcn-amd-amdhsa]]:[[ARCH2]])" {[[P23]]}, object | |
117 | |
118 // | |
119 // Test two gpu architecturess up to the assemble phase. | |
120 // | |
121 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
122 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \ | |
123 // RUN: | FileCheck -check-prefixes=ASM2,ASM2_NV %s | |
124 // RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
125 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %s -S 2>&1 \ | |
126 // RUN: | FileCheck -check-prefixes=ASM2,ASM2_AMD %s | |
127 // ASM2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH1:sm_30]]) | |
128 // ASM2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]]) | |
129 // ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) | |
130 // ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH1]]) | |
131 // ASM2_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH1]]) | |
132 // ASM2_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH1]])" {[[P3]]}, assembler | |
133 // ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]]) | |
134 // ASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) | |
135 // ASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) | |
136 // ASM2_NV-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]]) | |
137 // ASM2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler | |
138 // ASM2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (host-[[T]]) | |
139 // ASM2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (host-[[T]]) | |
140 // ASM2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (host-[[T]]) | |
141 // ASM2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (host-[[T]]) | |
142 | |
143 // | |
144 // Test single gpu architecture with complete compilation in host-only | |
145 // compilation mode. | |
146 // | |
147 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
148 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only 2>&1 \ | |
149 // RUN: | FileCheck -check-prefixes=HBIN,HBIN_NV %s | |
150 // RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
151 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only 2>&1 \ | |
152 // RUN: | FileCheck -check-prefixes=HBIN,HBIN_AMD %s | |
153 // HBIN_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) | |
154 // HBIN_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]]) | |
155 // HBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) | |
156 // HBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) | |
157 // HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) | |
158 // HBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]]) | |
159 // HBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]]) | |
160 // HBIN-NOT: device | |
161 // | |
162 // Test single gpu architecture up to the assemble phase in host-only | |
163 // compilation mode. | |
164 // | |
165 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
166 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only -S 2>&1 \ | |
167 // RUN: | FileCheck -check-prefixes=HASM,HASM_NV %s | |
168 // RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
169 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only -S 2>&1 \ | |
170 // RUN: | FileCheck -check-prefixes=HASM,HASM_AMD %s | |
171 // HASM_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) | |
172 // HASM_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]]) | |
173 // HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) | |
174 // HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) | |
175 // HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) | |
176 // HASM-NOT: device | |
177 | |
178 // | |
179 // Test two gpu architectures with complete compilation in host-only | |
180 // compilation mode. | |
181 // | |
182 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
183 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only 2>&1 \ | |
184 // RUN: | FileCheck -check-prefixes=HBIN2,HBIN2_NV %s | |
185 // RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
186 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only 2>&1 \ | |
187 // RUN: | FileCheck -check-prefixes=HBIN2,HBIN2_AMD %s | |
188 // HBIN2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) | |
189 // HBIN2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]]) | |
190 // HBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) | |
191 // HBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) | |
192 // HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) | |
193 // HBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]]) | |
194 // HBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]]) | |
195 // HBIN2-NOT: device | |
196 | |
197 // | |
198 // Test two gpu architectures up to the assemble phase in host-only | |
199 // compilation mode. | |
200 // | |
201 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
202 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only -S \ | |
203 // RUN: 2>&1 | FileCheck -check-prefixes=HASM2,HASM2_NV %s | |
204 // RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
205 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only -S \ | |
206 // RUN: 2>&1 | FileCheck -check-prefixes=HASM2,HASM2_AMD %s | |
207 // HASM2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) | |
208 // HASM2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]]) | |
209 // HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) | |
210 // HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) | |
211 // HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) | |
212 // HASM2-NOT: device | |
213 | |
214 // | |
215 // Test single gpu architecture with complete compilation in device-only | |
216 // compilation mode. | |
217 // | |
218 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
219 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only 2>&1 \ | |
220 // RUN: | FileCheck -check-prefixes=DBIN,DBIN_NV %s | |
221 // RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
222 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only 2>&1 \ | |
223 // RUN: | FileCheck -check-prefixes=DBIN,DBIN_AMD %s | |
224 // DBIN_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) | |
225 // DBIN_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) | |
226 // DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) | |
227 // DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) | |
228 // DBIN_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) | |
229 // DBIN_NV-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) | |
230 // DBIN_NV-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object | |
231 // DBIN-NOT: host | |
232 // | |
233 // Test single gpu architecture up to the assemble phase in device-only | |
234 // compilation mode. | |
235 // | |
236 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
237 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only -S 2>&1 \ | |
238 // RUN: | FileCheck -check-prefixes=DASM,DASM_NV %s | |
239 // RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
240 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -S 2>&1 \ | |
241 // RUN: | FileCheck -check-prefixes=DASM,DASM_AMD %s | |
242 // DASM_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) | |
243 // DASM_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) | |
244 // DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) | |
245 // DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) | |
246 // DASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) | |
247 // DASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler | |
248 // DASM-NOT: host | |
249 | |
250 // | |
251 // Test two gpu architectures with complete compilation in device-only | |
252 // compilation mode. | |
253 // | |
254 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
255 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \ | |
256 // RUN: | FileCheck -check-prefixes=DBIN2,DBIN2_NV %s | |
257 // RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
258 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \ | |
259 // RUN: 2>&1 | FileCheck -check-prefixes=DBIN2,DBIN2_AMD %s | |
260 // DBIN2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) | |
261 // DBIN2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) | |
262 // DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) | |
263 // DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) | |
264 // DBIN2_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) | |
265 // DBIN2_NV-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) | |
266 // DBIN2_NV-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P4]]}, object | |
267 // DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]]) | |
268 // DBIN2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) | |
269 // DBIN2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]]) | |
270 // DBIN2_NV-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]]) | |
271 // DBIN2_NV-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]]) | |
272 // DBIN2_NV-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P10]]}, object | |
273 // DBIN2-NOT: host | |
274 // | |
275 // Test two gpu architectures up to the assemble phase in device-only | |
276 // compilation mode. | |
277 // | |
278 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ | |
279 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S \ | |
280 // RUN: 2>&1 | FileCheck -check-prefixes=DASM2,DASM2_NV %s | |
281 // RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu \ | |
282 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ | |
283 // RUN: --cuda-device-only -S 2>&1 \ | |
284 // RUN: | FileCheck -check-prefixes=DASM2,DASM2_AMD %s | |
285 // DASM2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) | |
286 // DASM2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) | |
287 // DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) | |
288 // DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) | |
289 // DASM2_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) | |
290 // DASM2_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler | |
291 // DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]]) | |
292 // DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) | |
293 // DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) | |
294 // DASM2_NV-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]]) | |
295 // DASM2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler | |
296 // DASM2-NOT: host |