150
|
1 // Tests the bindings generated for a CUDA offloading target for different
|
|
2 // combinations of:
|
|
3 // - Number of gpu architectures;
|
|
4 // - Host/device-only compilation;
|
|
5 // - User-requested final phase - binary or assembly.
|
|
6 // It parallels cuda-phases.cu test, but verifies whether output file is temporary or not.
|
|
7
|
|
8 // It's hard to check whether file name is temporary in a portable
|
|
9 // way. Instead we check whether we've generated a permanent name on
|
|
10 // device side, which appends '-device-cuda-<triple>' suffix.
|
|
11
|
|
12 // REQUIRES: powerpc-registered-target
|
|
13 // REQUIRES: nvptx-registered-target
|
|
14
|
|
15 //
|
|
16 // Test single gpu architecture with complete compilation.
|
|
17 // No intermediary device files should have "-device-cuda..." in the name.
|
|
18 //
|
|
19 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s 2>&1 \
|
|
20 // RUN: | FileCheck -check-prefix=BIN %s
|
|
21 // BIN: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
|
|
22 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
|
|
23 // BIN: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output:
|
|
24 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
|
|
25 // BIN: # "nvptx64-nvidia-cuda" - "NVPTX::Linker",{{.*}} output:
|
|
26 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
|
|
27 // BIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
|
|
28 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
|
|
29 // BIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
|
|
30
|
|
31 //
|
|
32 // Test single gpu architecture up to the assemble phase.
|
|
33 //
|
|
34 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \
|
|
35 // RUN: | FileCheck -check-prefix=ASM %s
|
|
36 // ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
|
|
37 // ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
|
|
38
|
|
39 //
|
|
40 // Test two gpu architectures with complete compilation.
|
|
41 //
|
236
|
42 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
|
|
43 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
|
|
44 // RUN: | FileCheck -check-prefixes=BIN2,AOUT %s
|
|
45 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
|
|
46 // RUN: --offload-arch=sm_30,sm_35 %s 2>&1 \
|
|
47 // RUN: | FileCheck -check-prefixes=BIN2,AOUT %s
|
|
48 // .. same, but with explicitly specified output.
|
|
49 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
|
|
50 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -o %t/out 2>&1 \
|
|
51 // RUN: | FileCheck -check-prefixes=BIN2,TOUT %s
|
|
52 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
|
|
53 // RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
|
|
54 // RUN: | FileCheck -check-prefixes=BIN2,TOUT %s
|
150
|
55 // BIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
|
|
56 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
|
|
57 // BIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output:
|
|
58 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
|
|
59 // BIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
|
|
60 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
|
|
61 // BIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output:
|
|
62 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
|
|
63 // BIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Linker",{{.*}} output:
|
|
64 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
|
|
65 // BIN2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
|
|
66 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
|
236
|
67 // AOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
|
|
68 // TOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "{{.*}}/out"
|
|
69
|
|
70 // .. same, but with -fsyntax-only
|
|
71 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
|
|
72 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
|
|
73 // RUN: | FileCheck -check-prefix=SYN %s
|
|
74 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
|
|
75 // RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
|
|
76 // RUN: | FileCheck -check-prefix=SYN %s
|
|
77 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
|
|
78 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
|
|
79 // RUN: | FileCheck -check-prefix=SYN %s
|
|
80 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
|
|
81 // RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
|
|
82 // RUN: | FileCheck -check-prefix=SYN %s
|
|
83 // SYN-NOT: inputs:
|
|
84 // SYN: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
|
|
85 // SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
|
|
86 // SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
|
|
87 // SYN-NOT: inputs
|
|
88
|
|
89 // .. and with --offload-new-driver
|
|
90 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
|
|
91 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 --offload-new-driver %s 2>&1 \
|
|
92 // RUN: | FileCheck -check-prefix=NDSYN %s
|
|
93 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
|
|
94 // RUN: --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
|
|
95 // RUN: | FileCheck -check-prefix=NDSYN %s
|
|
96 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
|
|
97 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --offload-new-driver 2>&1 \
|
|
98 // RUN: | FileCheck -check-prefix=NDSYN %s
|
|
99 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
|
|
100 // RUN: --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
|
|
101 // RUN: | FileCheck -check-prefix=NDSYN %s
|
|
102 // NDSYN-NOT: inputs:
|
|
103 // NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
|
|
104 // NDSYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
|
|
105 // NDSYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
|
|
106 // NDSYN-NOT: inputs:
|
|
107
|
150
|
108
|
|
109 //
|
|
110 // Test two gpu architectures up to the assemble phase.
|
|
111 //
|
|
112 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
|
|
113 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
|
|
114 // RUN: | FileCheck -check-prefix=ASM2 %s
|
|
115 // ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
|
|
116 // ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
|
|
117 // ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
|
|
118
|
|
119 //
|
|
120 // Test one or more gpu architecture with complete compilation in host-only
|
|
121 // compilation mode.
|
|
122 //
|
|
123 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
|
|
124 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only 2>&1 \
|
|
125 // RUN: | FileCheck -check-prefix=HBIN %s
|
|
126 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
|
|
127 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only 2>&1 \
|
|
128 // RUN: | FileCheck -check-prefix=HBIN %s
|
|
129 // HBIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
|
|
130 // HBIN-NOT: cuda-bindings-device-cuda-nvptx64
|
|
131 // HBIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
|
|
132
|
|
133 //
|
|
134 // Test one or more gpu architecture up to the assemble phase in host-only
|
|
135 // compilation mode.
|
|
136 //
|
|
137 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
|
|
138 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only -S 2>&1 \
|
|
139 // RUN: | FileCheck -check-prefix=HASM %s
|
|
140 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
|
|
141 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only -S 2>&1 \
|
|
142 // RUN: | FileCheck -check-prefix=HASM %s
|
|
143 // HASM: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
|
|
144
|
|
145 //
|
|
146 // Test single gpu architecture with complete compilation in device-only
|
|
147 // compilation mode.
|
|
148 //
|
|
149 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
|
|
150 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only 2>&1 \
|
|
151 // RUN: | FileCheck -check-prefix=DBIN %s
|
|
152 // DBIN: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
|
|
153 // DBIN-NOT: cuda-bindings-device-cuda-nvptx64
|
|
154 // DBIN: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.o"
|
|
155
|
|
156 //
|
|
157 // Test single gpu architecture up to the assemble phase in device-only
|
|
158 // compilation mode.
|
|
159 //
|
|
160 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
|
|
161 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only -S 2>&1 \
|
|
162 // RUN: | FileCheck -check-prefix=DASM %s
|
|
163 // DASM: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
|
|
164
|
|
165 //
|
|
166 // Test two gpu architectures with complete compilation in device-only
|
|
167 // compilation mode.
|
|
168 //
|
|
169 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
|
|
170 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
|
|
171 // RUN: | FileCheck -check-prefix=DBIN2 %s
|
|
172 // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
|
|
173 // DBIN2-NOT: cuda-bindings-device-cuda-nvptx64
|
|
174 // DBIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.o"
|
|
175 // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
|
|
176 // DBIN2-NOT: cuda-bindings-device-cuda-nvptx64
|
|
177 // DBIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.o"
|
|
178
|
|
179 //
|
|
180 // Test two gpu architectures up to the assemble phase in device-only
|
|
181 // compilation mode.
|
|
182 //
|
|
183 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
|
|
184 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \
|
|
185 // RUN: | FileCheck -check-prefix=DASM2 %s
|
|
186 // DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
|
|
187 // DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
|
236
|
188
|
|
189 //
|
|
190 // Ensure we output the user's specified name in device-only mode.
|
|
191 //
|
|
192 // RUN: %clang -target powerpc64le-ibm-linux-gnu -### \
|
252
|
193 // RUN: --cuda-gpu-arch=sm_52 --cuda-device-only -c -o foo.o --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
|
236
|
194 // RUN: | FileCheck -check-prefix=D_ONLY %s
|
|
195 // RUN: %clang -target powerpc64le-ibm-linux-gnu -### --offload-new-driver \
|
252
|
196 // RUN: --cuda-gpu-arch=sm_52 --cuda-device-only -c -o foo.o --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
|
236
|
197 // RUN: | FileCheck -check-prefix=D_ONLY %s
|
|
198 // D_ONLY: "foo.o"
|
|
199
|
|
200 //
|
|
201 // Check to make sure we can generate multiple outputs for device-only
|
|
202 // compilation and fail with '-o'.
|
|
203 //
|
|
204 // RUN: %clang -### -target powerpc64le-ibm-linux-gnu --offload-new-driver -ccc-print-bindings \
|
|
205 // RUN: --offload-arch=sm_70 --offload-arch=sm_52 --offload-device-only -c %s 2>&1 \
|
|
206 // RUN: | FileCheck -check-prefix=MULTI-D-ONLY %s
|
|
207 // MULTI-D-ONLY: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_70:.+]]"
|
|
208 // MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_70]]"], output: "[[CUBIN_70:.+]]"
|
|
209 // MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_52:.+]]"
|
|
210 // MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_52]]"], output: "[[CUBIN_52:.+]]"
|
|
211 //
|
252
|
212 // RUN: not %clang -### --target=powerpc64le-ibm-linux-gnu --offload-new-driver -ccc-print-bindings \
|
236
|
213 // RUN: --offload-arch=sm_70 --offload-arch=sm_52 --offload-device-only -c -o %t %s 2>&1 \
|
|
214 // RUN: | FileCheck -check-prefix=MULTI-D-ONLY-O %s
|
|
215 // MULTI-D-ONLY-O: error: cannot specify -o when generating multiple output files
|
|
216
|
|
217 //
|
|
218 // Check to ensure that we can use '-fsyntax-only' for CUDA output with the new
|
|
219 // driver.
|
|
220 //
|
|
221 // RUN: %clang -### -target powerpc64le-ibm-linux-gnu --offload-new-driver \
|
252
|
222 // RUN: -fsyntax-only --offload-arch=sm_70 --offload-arch=sm_52 -c --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda %s 2>&1 \
|
236
|
223 // RUN: | FileCheck -check-prefix=SYNTAX-ONLY %s
|
|
224 // SYNTAX-ONLY: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-fsyntax-only"
|
|
225 // SYNTAX-ONLY: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-fsyntax-only"
|
|
226 // SYNTAX-ONLY: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu"{{.*}}"-fsyntax-only"
|
|
227
|
|
228 //
|
|
229 // Check to ensure that we can use '-save-temps' when operating in RDC-mode.
|
|
230 //
|
|
231 // RUN: %clang -### -target powerpc64le-ibm-linux-gnu -save-temps --offload-new-driver \
|
252
|
232 // RUN: -fgpu-rdc --offload-arch=sm_70 --offload-arch=sm_52 -c --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda %s 2>&1 \
|
236
|
233 // RUN: | FileCheck -check-prefix=SAVE-TEMPS %s
|
|
234 // SAVE-TEMPS: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_52"
|
|
235 // SAVE-TEMPS: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
|
|
236 // SAVE-TEMPS: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu"
|
|
237
|
|
238 //
|
|
239 // Check to ensure that we cannot use '-foffload' when not operating in RDC-mode.
|
|
240 //
|
252
|
241 // RUN: not %clang -### --target=powerpc64le-ibm-linux-gnu -fno-gpu-rdc --offload-new-driver \
|
236
|
242 // RUN: -foffload-lto --offload-arch=sm_70 --offload-arch=sm_52 -c %s 2>&1 \
|
|
243 // RUN: | FileCheck -check-prefix=LTO-NO-RDC %s
|
|
244 // LTO-NO-RDC: error: unsupported option '-foffload-lto' for language mode '-fno-gpu-rdc'
|