150
|
1 // RUN: %clang_cc1 -fcuda-is-device \
|
|
2 // RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
|
173
|
3 // RUN: FileCheck -check-prefixes=NOFTZ,PTXNOFTZ %s
|
150
|
4
|
|
5 // RUN: %clang_cc1 -fcuda-is-device -fdenormal-fp-math-f32=ieee \
|
|
6 // RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
|
173
|
7 // RUN: FileCheck -check-prefixes=NOFTZ,PTXNOFTZ %s
|
150
|
8
|
|
9 // RUN: %clang_cc1 -fcuda-is-device -fdenormal-fp-math-f32=preserve-sign \
|
|
10 // RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
|
173
|
11 // RUN: FileCheck -check-prefixes=FTZ,PTXFTZ %s
|
150
|
12
|
|
13 // RUN: %clang_cc1 -fcuda-is-device -x hip \
|
|
14 // RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -o - %s | \
|
173
|
15 // RUN: FileCheck -check-prefix=NOFTZ %s
|
150
|
16
|
|
17 // RUN: %clang_cc1 -fcuda-is-device -x hip \
|
|
18 // RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -fdenormal-fp-math-f32=ieee -emit-llvm -o - %s | \
|
173
|
19 // RUN: FileCheck -check-prefix=NOFTZ %s
|
150
|
20
|
|
21 // RUN: %clang_cc1 -fcuda-is-device -x hip -fdenormal-fp-math-f32=preserve-sign \
|
|
22 // RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -o - %s | \
|
173
|
23 // RUN: FileCheck -check-prefix=FTZ %s
|
150
|
24
|
|
25 #include "Inputs/cuda.h"
|
|
26
|
|
27 // Checks that device function calls get emitted with the "denormal-fp-math-f32"
|
|
28 // attribute set when we compile CUDA device code with
|
|
29 // -fdenormal-fp-math-f32. Further, check that we reflect the presence or
|
207
|
30 // absence of -fgpu-flush-denormals-to-zero in a module flag.
|
150
|
31
|
173
|
32 // AMDGCN targets always have f64/f16 denormals enabled.
|
|
33 //
|
|
34 // AMDGCN targets without fast FMAF (e.g. gfx803) always have f32 denormal
|
|
35 // flushing by default.
|
|
36 //
|
|
37 // For AMDGCN target with fast FMAF (e.g. gfx900), it has ieee denormals by
|
|
38 // default and preserve-sign when there with the option
|
207
|
39 // -fgpu-flush-denormals-to-zero.
|
150
|
40
|
|
41 // CHECK-LABEL: define void @foo() #0
|
|
42 extern "C" __device__ void foo() {}
|
|
43
|
|
44 // FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
|
173
|
45 // NOFTZ-NOT: "denormal-fp-math-f32"
|
150
|
46
|
173
|
47 // PTXFTZ:!llvm.module.flags = !{{{.*}}[[MODFLAG:![0-9]+]]}
|
|
48 // PTXFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1}
|
150
|
49
|
173
|
50 // PTXNOFTZ:!llvm.module.flags = !{{{.*}}[[MODFLAG:![0-9]+]]}
|
|
51 // PTXNOFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 0}
|