207
|
1 // RUN: %clang_cc1 \
|
|
2 // RUN: -triple aarch64-arm-none-eabi -target-feature +neon -target-feature +bf16 \
|
|
3 // RUN: -disable-O0-optnone -emit-llvm -o - %s \
|
|
4 // RUN: | opt -S -mem2reg -instcombine \
|
|
5 // RUN: | FileCheck --check-prefixes=CHECK,CHECK-A64 %s
|
|
6 // RUN: %clang_cc1 \
|
|
7 // RUN: -triple armv8.6a-arm-none-eabi -target-feature +neon \
|
|
8 // RUN: -target-feature +bf16 -mfloat-abi hard \
|
|
9 // RUN: -disable-O0-optnone -emit-llvm -o - %s \
|
|
10 // RUN: | opt -S -mem2reg -instcombine \
|
|
11 // RUN: | FileCheck --check-prefixes=CHECK,CHECK-A32-HARDFP %s
|
|
12 // RUN: %clang_cc1 \
|
|
13 // RUN: -triple armv8.6a-arm-none-eabi -target-feature +neon \
|
|
14 // RUN: -target-feature +bf16 -mfloat-abi softfp \
|
|
15 // RUN: -disable-O0-optnone -emit-llvm -o - %s \
|
|
16 // RUN: | opt -S -mem2reg -instcombine \
|
|
17 // RUN: | FileCheck --check-prefixes=CHECK,CHECK-A32-SOFTFP %s
|
|
18
|
|
19 #include <arm_neon.h>
|
|
20
|
|
21 // CHECK-LABEL: test_vcvt_f32_bf16
|
|
22 // CHECK: %[[EXT:.*]] = zext <4 x i16> %{{.*}} to <4 x i32>
|
|
23 // CHECK: shl nuw <4 x i32> %[[EXT]], <i32 16, i32 16, i32 16, i32 16>
|
|
24 float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) {
|
|
25 return vcvt_f32_bf16(a);
|
|
26 }
|
|
27
|
|
28 // CHECK-LABEL: test_vcvtq_low_f32_bf16
|
|
29 // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
30 // CHECK: %[[EXT:.*]] = zext <4 x i16> %{{.*}} to <4 x i32>
|
|
31 // CHECK: shl nuw <4 x i32> %[[EXT]], <i32 16, i32 16, i32 16, i32 16>
|
|
32 float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) {
|
|
33 return vcvtq_low_f32_bf16(a);
|
|
34 }
|
|
35
|
|
36 // CHECK-LABEL: test_vcvtq_high_f32_bf16
|
|
37 // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
38 // CHECK: %[[EXT:.*]] = zext <4 x i16> %{{.*}} to <4 x i32>
|
|
39 // CHECK: shl nuw <4 x i32> %[[EXT]], <i32 16, i32 16, i32 16, i32 16>
|
|
40 float32x4_t test_vcvtq_high_f32_bf16(bfloat16x8_t a) {
|
|
41 return vcvtq_high_f32_bf16(a);
|
|
42 }
|
|
43
|
|
44 // CHECK-LABEL: test_vcvt_bf16_f32
|
|
45 // CHECK-A64: %[[CVT:.*]] = call <8 x bfloat> @llvm.aarch64.neon.bfcvtn(<4 x float> %a)
|
|
46 // CHECK-A64: shufflevector <8 x bfloat> %[[CVT]], <8 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
47 // CHECK-A32-HARDFP: call <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16(<4 x float> %a)
|
|
48 // CHECK-A32-SOFTFP: call <4 x i16> @llvm.arm.neon.vcvtfp2bf.v4i16(<4 x float> %a)
|
|
49 bfloat16x4_t test_vcvt_bf16_f32(float32x4_t a) {
|
|
50 return vcvt_bf16_f32(a);
|
|
51 }
|
|
52
|
|
53 // CHECK-LABEL: test_vcvtq_low_bf16_f32
|
|
54 // CHECK-A64: call <8 x bfloat> @llvm.aarch64.neon.bfcvtn(<4 x float> %a)
|
|
55 // CHECK-A32-HARDFP: %[[CVT:.*]] = call <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16
|
|
56 // CHECK-A32-HARDFP: shufflevector <4 x bfloat> zeroinitializer, <4 x bfloat> %[[CVT]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
57 // CHECK-A32-SOFTFP: call <4 x i16> @llvm.arm.neon.vcvtfp2bf.v4i16
|
|
58 // CHECK-A32-SOFTFP: shufflevector <4 x bfloat> zeroinitializer, <4 x bfloat> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
59 bfloat16x8_t test_vcvtq_low_bf16_f32(float32x4_t a) {
|
|
60 return vcvtq_low_bf16_f32(a);
|
|
61 }
|
|
62
|
|
63 // CHECK-LABEL: test_vcvtq_high_bf16_f32
|
|
64 // CHECK-A64: call <8 x bfloat> @llvm.aarch64.neon.bfcvtn2(<8 x bfloat> %inactive, <4 x float> %a)
|
|
65 // CHECK-A32-HARDFP: %[[CVT:.*]] = call <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16(<4 x float> %a)
|
|
66 // CHECK-A32-HARDFP: %[[INACT:.*]] = shufflevector <8 x bfloat> %inactive, <8 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
67 // CHECK-A32-HARDFP: shufflevector <4 x bfloat> %[[CVT]], <4 x bfloat> %[[INACT]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
68 // CHECK-A32-SOFTFP: call <4 x i16> @llvm.arm.neon.vcvtfp2bf.v4i16(<4 x float> %a)
|
|
69 // CHECK-A32-SOFTFP: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
70 // CHECK-A32-SOFTFP: shufflevector <4 x bfloat> %{{.*}}, <4 x bfloat> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
71 bfloat16x8_t test_vcvtq_high_bf16_f32(bfloat16x8_t inactive, float32x4_t a) {
|
|
72 return vcvtq_high_bf16_f32(inactive, a);
|
|
73 }
|
|
74
|
|
75 // CHECK-LABEL: test_vcvth_bf16_f32
|
|
76 // CHECK-A64: call bfloat @llvm.aarch64.neon.bfcvt(float %a)
|
|
77 // CHECK-A32-HARDFP: call bfloat @llvm.arm.neon.vcvtbfp2bf(float %a)
|
|
78 // CHECK-A32-SOFTFP: call bfloat @llvm.arm.neon.vcvtbfp2bf(float %a)
|
|
79 bfloat16_t test_vcvth_bf16_f32(float32_t a) {
|
|
80 return vcvth_bf16_f32(a);
|
|
81 }
|
|
82
|
|
83 // CHECK-LABEL: test_vcvtah_f32_bf16
|
|
84 // CHECK: shl i32 %{{.*}}, 16
|
|
85 float32_t test_vcvtah_f32_bf16(bfloat16_t a) {
|
|
86 return vcvtah_f32_bf16(a);
|
|
87 }
|
|
88
|