Mercurial > hg > CbC > CbC_llvm
comparison clang/test/CodeGen/arm-bf16-convert-intrinsics.c @ 223:5f17cb93ff66 llvm-original
LLVM13 (2021/7/18)
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 18 Jul 2021 22:43:00 +0900 |
parents | 79ff65ed7e25 |
children | c4bab56944e8 |
comparison
equal
deleted
inserted
replaced
222:81f6424ef0e3 | 223:5f17cb93ff66 |
---|---|
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py | |
1 // RUN: %clang_cc1 \ | 2 // RUN: %clang_cc1 \ |
2 // RUN: -triple aarch64-arm-none-eabi -target-feature +neon -target-feature +bf16 \ | 3 // RUN: -triple aarch64-arm-none-eabi -target-feature +neon -target-feature +bf16 \ |
3 // RUN: -disable-O0-optnone -emit-llvm -o - %s \ | 4 // RUN: -disable-O0-optnone -emit-llvm -fno-legacy-pass-manager -o - %s \ |
4 // RUN: | opt -S -mem2reg -instcombine \ | 5 // RUN: | opt -S -mem2reg \ |
5 // RUN: | FileCheck --check-prefixes=CHECK,CHECK-A64 %s | 6 // RUN: | FileCheck --check-prefixes=CHECK,CHECK-A64 %s |
6 // RUN: %clang_cc1 \ | 7 // RUN: %clang_cc1 \ |
7 // RUN: -triple armv8.6a-arm-none-eabi -target-feature +neon \ | 8 // RUN: -triple armv8.6a-arm-none-eabi -target-feature +neon \ |
8 // RUN: -target-feature +bf16 -mfloat-abi hard \ | 9 // RUN: -target-feature +bf16 -mfloat-abi hard \ |
9 // RUN: -disable-O0-optnone -emit-llvm -o - %s \ | 10 // RUN: -disable-O0-optnone -emit-llvm -fno-legacy-pass-manager -o - %s \ |
10 // RUN: | opt -S -mem2reg -instcombine \ | 11 // RUN: | opt -S -mem2reg \ |
11 // RUN: | FileCheck --check-prefixes=CHECK,CHECK-A32-HARDFP %s | 12 // RUN: | FileCheck --check-prefixes=CHECK,CHECK-A32-HARDFP %s |
12 // RUN: %clang_cc1 \ | 13 // RUN: %clang_cc1 \ |
13 // RUN: -triple armv8.6a-arm-none-eabi -target-feature +neon \ | 14 // RUN: -triple armv8.6a-arm-none-eabi -target-feature +neon \ |
14 // RUN: -target-feature +bf16 -mfloat-abi softfp \ | 15 // RUN: -target-feature +bf16 -mfloat-abi softfp \ |
15 // RUN: -disable-O0-optnone -emit-llvm -o - %s \ | 16 // RUN: -disable-O0-optnone -emit-llvm -fno-legacy-pass-manager -o - %s \ |
16 // RUN: | opt -S -mem2reg -instcombine \ | 17 // RUN: | opt -S -mem2reg \ |
17 // RUN: | FileCheck --check-prefixes=CHECK,CHECK-A32-SOFTFP %s | 18 // RUN: | FileCheck --check-prefixes=CHECK,CHECK-A32-SOFTFP %s |
18 | 19 |
20 // REQUIRES: arm-registered-target | |
21 // REQUIRES: aarch64-registered-target | |
22 | |
19 #include <arm_neon.h> | 23 #include <arm_neon.h> |
20 | 24 |
21 // CHECK-LABEL: test_vcvt_f32_bf16 | 25 // CHECK-A64-LABEL: @test_vcvt_f32_bf16( |
22 // CHECK: %[[EXT:.*]] = zext <4 x i16> %{{.*}} to <4 x i32> | 26 // CHECK-A64-NEXT: entry: |
23 // CHECK: shl nuw <4 x i32> %[[EXT]], <i32 16, i32 16, i32 16, i32 16> | 27 // CHECK-A64-NEXT: [[__REINT_150_I:%.*]] = alloca <4 x bfloat>, align 8 |
28 // CHECK-A64-NEXT: [[__REINT1_150_I:%.*]] = alloca <4 x i32>, align 16 | |
29 // CHECK-A64-NEXT: store <4 x bfloat> [[A:%.*]], <4 x bfloat>* [[__REINT_150_I]], align 8 | |
30 // CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat>* [[__REINT_150_I]] to <4 x i16>* | |
31 // CHECK-A64-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8 | |
32 // CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> | |
33 // CHECK-A64-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> | |
34 // CHECK-A64-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP3]], <i32 16, i32 16, i32 16, i32 16> | |
35 // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I]], <4 x i32>* [[__REINT1_150_I]], align 16 | |
36 // CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <4 x i32>* [[__REINT1_150_I]] to <4 x float>* | |
37 // CHECK-A64-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 16 | |
38 // CHECK-A64-NEXT: ret <4 x float> [[TMP5]] | |
39 // | |
40 // CHECK-A32-HARDFP-LABEL: @test_vcvt_f32_bf16( | |
41 // CHECK-A32-HARDFP-NEXT: entry: | |
42 // CHECK-A32-HARDFP-NEXT: [[__REINT_150_I:%.*]] = alloca <4 x bfloat>, align 8 | |
43 // CHECK-A32-HARDFP-NEXT: [[__REINT1_150_I:%.*]] = alloca <4 x i32>, align 8 | |
44 // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[A:%.*]], <4 x bfloat>* [[__REINT_150_I]], align 8 | |
45 // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat>* [[__REINT_150_I]] to <4 x i16>* | |
46 // CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8 | |
47 // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> | |
48 // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> | |
49 // CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP3]], <i32 16, i32 16, i32 16, i32 16> | |
50 // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I]], <4 x i32>* [[__REINT1_150_I]], align 8 | |
51 // CHECK-A32-HARDFP-NEXT: [[TMP4:%.*]] = bitcast <4 x i32>* [[__REINT1_150_I]] to <4 x float>* | |
52 // CHECK-A32-HARDFP-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 8 | |
53 // CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP5]] | |
54 // | |
55 // CHECK-A32-SOFTFP-LABEL: @test_vcvt_f32_bf16( | |
56 // CHECK-A32-SOFTFP-NEXT: entry: | |
57 // CHECK-A32-SOFTFP-NEXT: [[__P0_150_I:%.*]] = alloca <4 x bfloat>, align 8 | |
58 // CHECK-A32-SOFTFP-NEXT: [[__REINT_150_I:%.*]] = alloca <4 x bfloat>, align 8 | |
59 // CHECK-A32-SOFTFP-NEXT: [[__REINT1_150_I:%.*]] = alloca <4 x i32>, align 8 | |
60 // CHECK-A32-SOFTFP-NEXT: [[A:%.*]] = alloca <4 x bfloat>, align 8 | |
61 // CHECK-A32-SOFTFP-NEXT: [[COERCE:%.*]] = alloca <4 x bfloat>, align 8 | |
62 // CHECK-A32-SOFTFP-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat>* [[A]] to <2 x i32>* | |
63 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[A_COERCE:%.*]], <2 x i32>* [[TMP0]], align 8 | |
64 // CHECK-A32-SOFTFP-NEXT: [[A1:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[A]], align 8 | |
65 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[A1]], <4 x bfloat>* [[COERCE]], align 8 | |
66 // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = bitcast <4 x bfloat>* [[COERCE]] to <2 x i32>* | |
67 // CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 8 | |
68 // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = bitcast <4 x bfloat>* [[__P0_150_I]] to <2 x i32>* | |
69 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP2]], <2 x i32>* [[TMP3]], align 8 | |
70 // CHECK-A32-SOFTFP-NEXT: [[__P0_1501_I:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[__P0_150_I]], align 8 | |
71 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_1501_I]], <4 x bfloat>* [[__REINT_150_I]], align 8 | |
72 // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = bitcast <4 x bfloat>* [[__REINT_150_I]] to <4 x i16>* | |
73 // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 8 | |
74 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> | |
75 // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32> | |
76 // CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP7]], <i32 16, i32 16, i32 16, i32 16> | |
77 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I]], <4 x i32>* [[__REINT1_150_I]], align 8 | |
78 // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = bitcast <4 x i32>* [[__REINT1_150_I]] to <4 x float>* | |
79 // CHECK-A32-SOFTFP-NEXT: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[TMP8]], align 8 | |
80 // CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP9]] | |
81 // | |
24 float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) { | 82 float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) { |
25 return vcvt_f32_bf16(a); | 83 return vcvt_f32_bf16(a); |
26 } | 84 } |
27 | 85 |
28 // CHECK-LABEL: test_vcvtq_low_f32_bf16 | 86 // CHECK-A64-LABEL: @test_vcvtq_low_f32_bf16( |
29 // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 87 // CHECK-A64-NEXT: entry: |
30 // CHECK: %[[EXT:.*]] = zext <4 x i16> %{{.*}} to <4 x i32> | 88 // CHECK-A64-NEXT: [[__REINT_150_I_I:%.*]] = alloca <4 x bfloat>, align 8 |
31 // CHECK: shl nuw <4 x i32> %[[EXT]], <i32 16, i32 16, i32 16, i32 16> | 89 // CHECK-A64-NEXT: [[__REINT1_150_I_I:%.*]] = alloca <4 x i32>, align 16 |
90 // CHECK-A64-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> | |
91 // CHECK-A64-NEXT: store <4 x bfloat> [[SHUFFLE_I]], <4 x bfloat>* [[__REINT_150_I_I]], align 8 | |
92 // CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat>* [[__REINT_150_I_I]] to <4 x i16>* | |
93 // CHECK-A64-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8 | |
94 // CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> | |
95 // CHECK-A64-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> | |
96 // CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP3]], <i32 16, i32 16, i32 16, i32 16> | |
97 // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], <4 x i32>* [[__REINT1_150_I_I]], align 16 | |
98 // CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <4 x i32>* [[__REINT1_150_I_I]] to <4 x float>* | |
99 // CHECK-A64-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 16 | |
100 // CHECK-A64-NEXT: ret <4 x float> [[TMP5]] | |
101 // | |
102 // CHECK-A32-HARDFP-LABEL: @test_vcvtq_low_f32_bf16( | |
103 // CHECK-A32-HARDFP-NEXT: entry: | |
104 // CHECK-A32-HARDFP-NEXT: [[__REINT_150_I_I:%.*]] = alloca <4 x bfloat>, align 8 | |
105 // CHECK-A32-HARDFP-NEXT: [[__REINT1_150_I_I:%.*]] = alloca <4 x i32>, align 8 | |
106 // CHECK-A32-HARDFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> | |
107 // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], <4 x bfloat>* [[__REINT_150_I_I]], align 8 | |
108 // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat>* [[__REINT_150_I_I]] to <4 x i16>* | |
109 // CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8 | |
110 // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> | |
111 // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> | |
112 // CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP3]], <i32 16, i32 16, i32 16, i32 16> | |
113 // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], <4 x i32>* [[__REINT1_150_I_I]], align 8 | |
114 // CHECK-A32-HARDFP-NEXT: [[TMP4:%.*]] = bitcast <4 x i32>* [[__REINT1_150_I_I]] to <4 x float>* | |
115 // CHECK-A32-HARDFP-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 8 | |
116 // CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP5]] | |
117 // | |
118 // CHECK-A32-SOFTFP-LABEL: @test_vcvtq_low_f32_bf16( | |
119 // CHECK-A32-SOFTFP-NEXT: entry: | |
120 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I:%.*]] = alloca <4 x bfloat>, align 8 | |
121 // CHECK-A32-SOFTFP-NEXT: [[__P0_I2:%.*]] = alloca <8 x bfloat>, align 8 | |
122 // CHECK-A32-SOFTFP-NEXT: [[__P0_150_I_I:%.*]] = alloca <4 x bfloat>, align 8 | |
123 // CHECK-A32-SOFTFP-NEXT: [[__REINT_150_I_I:%.*]] = alloca <4 x bfloat>, align 8 | |
124 // CHECK-A32-SOFTFP-NEXT: [[__REINT1_150_I_I:%.*]] = alloca <4 x i32>, align 8 | |
125 // CHECK-A32-SOFTFP-NEXT: [[__P0_I:%.*]] = alloca <8 x bfloat>, align 8 | |
126 // CHECK-A32-SOFTFP-NEXT: [[COERCE_I:%.*]] = alloca <8 x bfloat>, align 8 | |
127 // CHECK-A32-SOFTFP-NEXT: [[COERCE2_I:%.*]] = alloca <4 x bfloat>, align 8 | |
128 // CHECK-A32-SOFTFP-NEXT: [[COERCE3_I:%.*]] = alloca <4 x bfloat>, align 8 | |
129 // CHECK-A32-SOFTFP-NEXT: [[A:%.*]] = alloca <8 x bfloat>, align 8 | |
130 // CHECK-A32-SOFTFP-NEXT: [[COERCE:%.*]] = alloca <8 x bfloat>, align 8 | |
131 // CHECK-A32-SOFTFP-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat>* [[A]] to <4 x i32>* | |
132 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[A_COERCE:%.*]], <4 x i32>* [[TMP0]], align 8 | |
133 // CHECK-A32-SOFTFP-NEXT: [[A1:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[A]], align 8 | |
134 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[A1]], <8 x bfloat>* [[COERCE]], align 8 | |
135 // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat>* [[COERCE]] to <4 x i32>* | |
136 // CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 8 | |
137 // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = bitcast <8 x bfloat>* [[__P0_I]] to <4 x i32>* | |
138 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 8 | |
139 // CHECK-A32-SOFTFP-NEXT: [[__P01_I:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[__P0_I]], align 8 | |
140 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[__P01_I]], <8 x bfloat>* [[COERCE_I]], align 8 | |
141 // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = bitcast <8 x bfloat>* [[COERCE_I]] to <4 x i32>* | |
142 // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 8 | |
143 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <8 x bfloat>* [[__P0_I2]] to <4 x i32>* | |
144 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 8 | |
145 // CHECK-A32-SOFTFP-NEXT: [[__P01_I5:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[__P0_I2]], align 8 | |
146 // CHECK-A32-SOFTFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[__P01_I5]], <8 x bfloat> [[__P01_I5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> | |
147 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], <4 x bfloat>* [[RETVAL_I]], align 8 | |
148 // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = bitcast <4 x bfloat>* [[RETVAL_I]] to <2 x i32>* | |
149 // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[TMP7]], align 8 | |
150 // CHECK-A32-SOFTFP-NEXT: [[TMP9:%.*]] = bitcast <4 x bfloat>* [[COERCE2_I]] to <2 x i32>* | |
151 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP8]], <2 x i32>* [[TMP9]], align 8 | |
152 // CHECK-A32-SOFTFP-NEXT: [[TMP10:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[COERCE2_I]], align 8 | |
153 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP10]], <4 x bfloat>* [[COERCE3_I]], align 8 | |
154 // CHECK-A32-SOFTFP-NEXT: [[TMP11:%.*]] = bitcast <4 x bfloat>* [[COERCE3_I]] to <2 x i32>* | |
155 // CHECK-A32-SOFTFP-NEXT: [[TMP12:%.*]] = load <2 x i32>, <2 x i32>* [[TMP11]], align 8 | |
156 // CHECK-A32-SOFTFP-NEXT: [[TMP13:%.*]] = bitcast <4 x bfloat>* [[__P0_150_I_I]] to <2 x i32>* | |
157 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP12]], <2 x i32>* [[TMP13]], align 8 | |
158 // CHECK-A32-SOFTFP-NEXT: [[__P0_1501_I_I:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[__P0_150_I_I]], align 8 | |
159 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_1501_I_I]], <4 x bfloat>* [[__REINT_150_I_I]], align 8 | |
160 // CHECK-A32-SOFTFP-NEXT: [[TMP14:%.*]] = bitcast <4 x bfloat>* [[__REINT_150_I_I]] to <4 x i16>* | |
161 // CHECK-A32-SOFTFP-NEXT: [[TMP15:%.*]] = load <4 x i16>, <4 x i16>* [[TMP14]], align 8 | |
162 // CHECK-A32-SOFTFP-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP15]] to <8 x i8> | |
163 // CHECK-A32-SOFTFP-NEXT: [[TMP17:%.*]] = sext <4 x i16> [[TMP15]] to <4 x i32> | |
164 // CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP17]], <i32 16, i32 16, i32 16, i32 16> | |
165 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], <4 x i32>* [[__REINT1_150_I_I]], align 8 | |
166 // CHECK-A32-SOFTFP-NEXT: [[TMP18:%.*]] = bitcast <4 x i32>* [[__REINT1_150_I_I]] to <4 x float>* | |
167 // CHECK-A32-SOFTFP-NEXT: [[TMP19:%.*]] = load <4 x float>, <4 x float>* [[TMP18]], align 8 | |
168 // CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP19]] | |
169 // | |
32 float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) { | 170 float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) { |
33 return vcvtq_low_f32_bf16(a); | 171 return vcvtq_low_f32_bf16(a); |
34 } | 172 } |
35 | 173 |
36 // CHECK-LABEL: test_vcvtq_high_f32_bf16 | 174 // CHECK-A64-LABEL: @test_vcvtq_high_f32_bf16( |
37 // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | 175 // CHECK-A64-NEXT: entry: |
38 // CHECK: %[[EXT:.*]] = zext <4 x i16> %{{.*}} to <4 x i32> | 176 // CHECK-A64-NEXT: [[__REINT_150_I_I:%.*]] = alloca <4 x bfloat>, align 8 |
39 // CHECK: shl nuw <4 x i32> %[[EXT]], <i32 16, i32 16, i32 16, i32 16> | 177 // CHECK-A64-NEXT: [[__REINT1_150_I_I:%.*]] = alloca <4 x i32>, align 16 |
178 // CHECK-A64-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> | |
179 // CHECK-A64-NEXT: store <4 x bfloat> [[SHUFFLE_I]], <4 x bfloat>* [[__REINT_150_I_I]], align 8 | |
180 // CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat>* [[__REINT_150_I_I]] to <4 x i16>* | |
181 // CHECK-A64-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8 | |
182 // CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> | |
183 // CHECK-A64-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> | |
184 // CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP3]], <i32 16, i32 16, i32 16, i32 16> | |
185 // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], <4 x i32>* [[__REINT1_150_I_I]], align 16 | |
186 // CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <4 x i32>* [[__REINT1_150_I_I]] to <4 x float>* | |
187 // CHECK-A64-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 16 | |
188 // CHECK-A64-NEXT: ret <4 x float> [[TMP5]] | |
189 // | |
190 // CHECK-A32-HARDFP-LABEL: @test_vcvtq_high_f32_bf16( | |
191 // CHECK-A32-HARDFP-NEXT: entry: | |
192 // CHECK-A32-HARDFP-NEXT: [[__REINT_150_I_I:%.*]] = alloca <4 x bfloat>, align 8 | |
193 // CHECK-A32-HARDFP-NEXT: [[__REINT1_150_I_I:%.*]] = alloca <4 x i32>, align 8 | |
194 // CHECK-A32-HARDFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> | |
195 // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], <4 x bfloat>* [[__REINT_150_I_I]], align 8 | |
196 // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat>* [[__REINT_150_I_I]] to <4 x i16>* | |
197 // CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8 | |
198 // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> | |
199 // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> | |
200 // CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP3]], <i32 16, i32 16, i32 16, i32 16> | |
201 // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], <4 x i32>* [[__REINT1_150_I_I]], align 8 | |
202 // CHECK-A32-HARDFP-NEXT: [[TMP4:%.*]] = bitcast <4 x i32>* [[__REINT1_150_I_I]] to <4 x float>* | |
203 // CHECK-A32-HARDFP-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 8 | |
204 // CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP5]] | |
205 // | |
206 // CHECK-A32-SOFTFP-LABEL: @test_vcvtq_high_f32_bf16( | |
207 // CHECK-A32-SOFTFP-NEXT: entry: | |
208 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I:%.*]] = alloca <4 x bfloat>, align 8 | |
209 // CHECK-A32-SOFTFP-NEXT: [[__P0_I2:%.*]] = alloca <8 x bfloat>, align 8 | |
210 // CHECK-A32-SOFTFP-NEXT: [[__P0_150_I_I:%.*]] = alloca <4 x bfloat>, align 8 | |
211 // CHECK-A32-SOFTFP-NEXT: [[__REINT_150_I_I:%.*]] = alloca <4 x bfloat>, align 8 | |
212 // CHECK-A32-SOFTFP-NEXT: [[__REINT1_150_I_I:%.*]] = alloca <4 x i32>, align 8 | |
213 // CHECK-A32-SOFTFP-NEXT: [[__P0_I:%.*]] = alloca <8 x bfloat>, align 8 | |
214 // CHECK-A32-SOFTFP-NEXT: [[COERCE_I:%.*]] = alloca <8 x bfloat>, align 8 | |
215 // CHECK-A32-SOFTFP-NEXT: [[COERCE2_I:%.*]] = alloca <4 x bfloat>, align 8 | |
216 // CHECK-A32-SOFTFP-NEXT: [[COERCE3_I:%.*]] = alloca <4 x bfloat>, align 8 | |
217 // CHECK-A32-SOFTFP-NEXT: [[A:%.*]] = alloca <8 x bfloat>, align 8 | |
218 // CHECK-A32-SOFTFP-NEXT: [[COERCE:%.*]] = alloca <8 x bfloat>, align 8 | |
219 // CHECK-A32-SOFTFP-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat>* [[A]] to <4 x i32>* | |
220 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[A_COERCE:%.*]], <4 x i32>* [[TMP0]], align 8 | |
221 // CHECK-A32-SOFTFP-NEXT: [[A1:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[A]], align 8 | |
222 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[A1]], <8 x bfloat>* [[COERCE]], align 8 | |
223 // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat>* [[COERCE]] to <4 x i32>* | |
224 // CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 8 | |
225 // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = bitcast <8 x bfloat>* [[__P0_I]] to <4 x i32>* | |
226 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 8 | |
227 // CHECK-A32-SOFTFP-NEXT: [[__P01_I:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[__P0_I]], align 8 | |
228 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[__P01_I]], <8 x bfloat>* [[COERCE_I]], align 8 | |
229 // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = bitcast <8 x bfloat>* [[COERCE_I]] to <4 x i32>* | |
230 // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 8 | |
231 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <8 x bfloat>* [[__P0_I2]] to <4 x i32>* | |
232 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 8 | |
233 // CHECK-A32-SOFTFP-NEXT: [[__P01_I5:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[__P0_I2]], align 8 | |
234 // CHECK-A32-SOFTFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[__P01_I5]], <8 x bfloat> [[__P01_I5]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> | |
235 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], <4 x bfloat>* [[RETVAL_I]], align 8 | |
236 // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = bitcast <4 x bfloat>* [[RETVAL_I]] to <2 x i32>* | |
237 // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[TMP7]], align 8 | |
238 // CHECK-A32-SOFTFP-NEXT: [[TMP9:%.*]] = bitcast <4 x bfloat>* [[COERCE2_I]] to <2 x i32>* | |
239 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP8]], <2 x i32>* [[TMP9]], align 8 | |
240 // CHECK-A32-SOFTFP-NEXT: [[TMP10:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[COERCE2_I]], align 8 | |
241 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP10]], <4 x bfloat>* [[COERCE3_I]], align 8 | |
242 // CHECK-A32-SOFTFP-NEXT: [[TMP11:%.*]] = bitcast <4 x bfloat>* [[COERCE3_I]] to <2 x i32>* | |
243 // CHECK-A32-SOFTFP-NEXT: [[TMP12:%.*]] = load <2 x i32>, <2 x i32>* [[TMP11]], align 8 | |
244 // CHECK-A32-SOFTFP-NEXT: [[TMP13:%.*]] = bitcast <4 x bfloat>* [[__P0_150_I_I]] to <2 x i32>* | |
245 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP12]], <2 x i32>* [[TMP13]], align 8 | |
246 // CHECK-A32-SOFTFP-NEXT: [[__P0_1501_I_I:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[__P0_150_I_I]], align 8 | |
247 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_1501_I_I]], <4 x bfloat>* [[__REINT_150_I_I]], align 8 | |
248 // CHECK-A32-SOFTFP-NEXT: [[TMP14:%.*]] = bitcast <4 x bfloat>* [[__REINT_150_I_I]] to <4 x i16>* | |
249 // CHECK-A32-SOFTFP-NEXT: [[TMP15:%.*]] = load <4 x i16>, <4 x i16>* [[TMP14]], align 8 | |
250 // CHECK-A32-SOFTFP-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP15]] to <8 x i8> | |
251 // CHECK-A32-SOFTFP-NEXT: [[TMP17:%.*]] = sext <4 x i16> [[TMP15]] to <4 x i32> | |
252 // CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP17]], <i32 16, i32 16, i32 16, i32 16> | |
253 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], <4 x i32>* [[__REINT1_150_I_I]], align 8 | |
254 // CHECK-A32-SOFTFP-NEXT: [[TMP18:%.*]] = bitcast <4 x i32>* [[__REINT1_150_I_I]] to <4 x float>* | |
255 // CHECK-A32-SOFTFP-NEXT: [[TMP19:%.*]] = load <4 x float>, <4 x float>* [[TMP18]], align 8 | |
256 // CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP19]] | |
257 // | |
40 float32x4_t test_vcvtq_high_f32_bf16(bfloat16x8_t a) { | 258 float32x4_t test_vcvtq_high_f32_bf16(bfloat16x8_t a) { |
41 return vcvtq_high_f32_bf16(a); | 259 return vcvtq_high_f32_bf16(a); |
42 } | 260 } |
43 | 261 |
44 // CHECK-LABEL: test_vcvt_bf16_f32 | 262 // CHECK-A64-LABEL: @test_vcvt_bf16_f32( |
45 // CHECK-A64: %[[CVT:.*]] = call <8 x bfloat> @llvm.aarch64.neon.bfcvtn(<4 x float> %a) | 263 // CHECK-A64-NEXT: entry: |
46 // CHECK-A64: shufflevector <8 x bfloat> %[[CVT]], <8 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 264 // CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> |
47 // CHECK-A32-HARDFP: call <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16(<4 x float> %a) | 265 // CHECK-A64-NEXT: [[__A64_VCVTQ_LOW_BF16_V1_I:%.*]] = call <8 x bfloat> @llvm.aarch64.neon.bfcvtn(<4 x float> [[A]]) #[[ATTR3:[0-9]+]] |
48 // CHECK-A32-SOFTFP: call <4 x i16> @llvm.arm.neon.vcvtfp2bf.v4i16(<4 x float> %a) | 266 // CHECK-A64-NEXT: [[__A64_VCVTQ_LOW_BF16_V2_I:%.*]] = bitcast <8 x bfloat> [[__A64_VCVTQ_LOW_BF16_V1_I]] to <16 x i8> |
267 // CHECK-A64-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[__A64_VCVTQ_LOW_BF16_V1_I]], <8 x bfloat> [[__A64_VCVTQ_LOW_BF16_V1_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> | |
268 // CHECK-A64-NEXT: ret <4 x bfloat> [[SHUFFLE_I]] | |
269 // | |
270 // CHECK-A32-HARDFP-LABEL: @test_vcvt_bf16_f32( | |
271 // CHECK-A32-HARDFP-NEXT: entry: | |
272 // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> | |
273 // CHECK-A32-HARDFP-NEXT: [[VCVTFP2BF1_I:%.*]] = call <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16(<4 x float> [[A]]) #[[ATTR3:[0-9]+]] | |
274 // CHECK-A32-HARDFP-NEXT: ret <4 x bfloat> [[VCVTFP2BF1_I]] | |
275 // | |
276 // CHECK-A32-SOFTFP-LABEL: @test_vcvt_bf16_f32( | |
277 // CHECK-A32-SOFTFP-NEXT: entry: | |
278 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I1:%.*]] = alloca <4 x bfloat>, align 8 | |
279 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I:%.*]] = alloca <4 x bfloat>, align 8 | |
280 // CHECK-A32-SOFTFP-NEXT: [[COERCE_I:%.*]] = alloca <4 x bfloat>, align 8 | |
281 // CHECK-A32-SOFTFP-NEXT: [[RETVAL:%.*]] = alloca <4 x bfloat>, align 8 | |
282 // CHECK-A32-SOFTFP-NEXT: [[COERCE:%.*]] = alloca <4 x bfloat>, align 8 | |
283 // CHECK-A32-SOFTFP-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> | |
284 // CHECK-A32-SOFTFP-NEXT: [[VCVTFP2BF1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2bf.v4i16(<4 x float> [[A]]) #[[ATTR3:[0-9]+]] | |
285 // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VCVTFP2BF1_I]] to <4 x bfloat> | |
286 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP1]], <4 x bfloat>* [[RETVAL_I1]], align 8 | |
287 // CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = bitcast <4 x bfloat>* [[RETVAL_I1]] to <2 x i32>* | |
288 // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 8 | |
289 // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = bitcast <4 x bfloat>* [[COERCE_I]] to <2 x i32>* | |
290 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP4]], align 8 | |
291 // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[COERCE_I]], align 8 | |
292 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP5]], <4 x bfloat>* [[RETVAL_I]], align 8 | |
293 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x bfloat>* [[RETVAL_I]] to <2 x i32>* | |
294 // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP6]], align 8 | |
295 // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = bitcast <4 x bfloat>* [[COERCE]] to <2 x i32>* | |
296 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP7]], <2 x i32>* [[TMP8]], align 8 | |
297 // CHECK-A32-SOFTFP-NEXT: [[TMP9:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[COERCE]], align 8 | |
298 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP9]], <4 x bfloat>* [[RETVAL]], align 8 | |
299 // CHECK-A32-SOFTFP-NEXT: [[TMP10:%.*]] = bitcast <4 x bfloat>* [[RETVAL]] to <2 x i32>* | |
300 // CHECK-A32-SOFTFP-NEXT: [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[TMP10]], align 8 | |
301 // CHECK-A32-SOFTFP-NEXT: ret <2 x i32> [[TMP11]] | |
302 // | |
49 bfloat16x4_t test_vcvt_bf16_f32(float32x4_t a) { | 303 bfloat16x4_t test_vcvt_bf16_f32(float32x4_t a) { |
50 return vcvt_bf16_f32(a); | 304 return vcvt_bf16_f32(a); |
51 } | 305 } |
52 | 306 |
53 // CHECK-LABEL: test_vcvtq_low_bf16_f32 | 307 // CHECK-A64-LABEL: @test_vcvtq_low_bf16_f32( |
54 // CHECK-A64: call <8 x bfloat> @llvm.aarch64.neon.bfcvtn(<4 x float> %a) | 308 // CHECK-A64-NEXT: entry: |
55 // CHECK-A32-HARDFP: %[[CVT:.*]] = call <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16 | 309 // CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> |
56 // CHECK-A32-HARDFP: shufflevector <4 x bfloat> zeroinitializer, <4 x bfloat> %[[CVT]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | 310 // CHECK-A64-NEXT: [[__A64_VCVTQ_LOW_BF16_V1_I:%.*]] = call <8 x bfloat> @llvm.aarch64.neon.bfcvtn(<4 x float> [[A]]) #[[ATTR3]] |
57 // CHECK-A32-SOFTFP: call <4 x i16> @llvm.arm.neon.vcvtfp2bf.v4i16 | 311 // CHECK-A64-NEXT: [[__A64_VCVTQ_LOW_BF16_V2_I:%.*]] = bitcast <8 x bfloat> [[__A64_VCVTQ_LOW_BF16_V1_I]] to <16 x i8> |
58 // CHECK-A32-SOFTFP: shufflevector <4 x bfloat> zeroinitializer, <4 x bfloat> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | 312 // CHECK-A64-NEXT: ret <8 x bfloat> [[__A64_VCVTQ_LOW_BF16_V1_I]] |
313 // | |
314 // CHECK-A32-HARDFP-LABEL: @test_vcvtq_low_bf16_f32( | |
315 // CHECK-A32-HARDFP-NEXT: entry: | |
316 // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> | |
317 // CHECK-A32-HARDFP-NEXT: [[VCVTFP2BF1_I:%.*]] = call <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16(<4 x float> [[A]]) #[[ATTR3]] | |
318 // CHECK-A32-HARDFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x bfloat> zeroinitializer, <4 x bfloat> [[VCVTFP2BF1_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | |
319 // CHECK-A32-HARDFP-NEXT: ret <8 x bfloat> [[SHUFFLE_I]] | |
320 // | |
321 // CHECK-A32-SOFTFP-LABEL: @test_vcvtq_low_bf16_f32( | |
322 // CHECK-A32-SOFTFP-NEXT: entry: | |
323 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I4:%.*]] = alloca <8 x bfloat>, align 8 | |
324 // CHECK-A32-SOFTFP-NEXT: [[__P0_I:%.*]] = alloca <4 x bfloat>, align 8 | |
325 // CHECK-A32-SOFTFP-NEXT: [[__P1_I:%.*]] = alloca <4 x bfloat>, align 8 | |
326 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I1:%.*]] = alloca <4 x bfloat>, align 8 | |
327 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I:%.*]] = alloca <8 x bfloat>, align 8 | |
328 // CHECK-A32-SOFTFP-NEXT: [[COERCE_I:%.*]] = alloca <4 x bfloat>, align 8 | |
329 // CHECK-A32-SOFTFP-NEXT: [[COERCE1_I:%.*]] = alloca <4 x bfloat>, align 8 | |
330 // CHECK-A32-SOFTFP-NEXT: [[COERCE2_I:%.*]] = alloca <4 x bfloat>, align 8 | |
331 // CHECK-A32-SOFTFP-NEXT: [[COERCE4_I:%.*]] = alloca <8 x bfloat>, align 8 | |
332 // CHECK-A32-SOFTFP-NEXT: [[RETVAL:%.*]] = alloca <8 x bfloat>, align 8 | |
333 // CHECK-A32-SOFTFP-NEXT: [[COERCE:%.*]] = alloca <8 x bfloat>, align 8 | |
334 // CHECK-A32-SOFTFP-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> | |
335 // CHECK-A32-SOFTFP-NEXT: [[VCVTFP2BF1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2bf.v4i16(<4 x float> [[A]]) #[[ATTR3]] | |
336 // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VCVTFP2BF1_I]] to <4 x bfloat> | |
337 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP1]], <4 x bfloat>* [[RETVAL_I1]], align 8 | |
338 // CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = bitcast <4 x bfloat>* [[RETVAL_I1]] to <2 x i32>* | |
339 // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 8 | |
340 // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = bitcast <4 x bfloat>* [[COERCE_I]] to <2 x i32>* | |
341 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP4]], align 8 | |
342 // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[COERCE_I]], align 8 | |
343 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> zeroinitializer, <4 x bfloat>* [[COERCE1_I]], align 8 | |
344 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x bfloat>* [[COERCE1_I]] to <2 x i32>* | |
345 // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP6]], align 8 | |
346 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP5]], <4 x bfloat>* [[COERCE2_I]], align 8 | |
347 // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = bitcast <4 x bfloat>* [[COERCE2_I]] to <2 x i32>* | |
348 // CHECK-A32-SOFTFP-NEXT: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[TMP8]], align 8 | |
349 // CHECK-A32-SOFTFP-NEXT: [[TMP10:%.*]] = bitcast <4 x bfloat>* [[__P0_I]] to <2 x i32>* | |
350 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP7]], <2 x i32>* [[TMP10]], align 8 | |
351 // CHECK-A32-SOFTFP-NEXT: [[__P01_I:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[__P0_I]], align 8 | |
352 // CHECK-A32-SOFTFP-NEXT: [[TMP11:%.*]] = bitcast <4 x bfloat>* [[__P1_I]] to <2 x i32>* | |
353 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP9]], <2 x i32>* [[TMP11]], align 8 | |
354 // CHECK-A32-SOFTFP-NEXT: [[__P12_I:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[__P1_I]], align 8 | |
355 // CHECK-A32-SOFTFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x bfloat> [[__P01_I]], <4 x bfloat> [[__P12_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | |
356 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[SHUFFLE_I]], <8 x bfloat>* [[RETVAL_I4]], align 8 | |
357 // CHECK-A32-SOFTFP-NEXT: [[TMP12:%.*]] = bitcast <8 x bfloat>* [[RETVAL_I4]] to <4 x i32>* | |
358 // CHECK-A32-SOFTFP-NEXT: [[TMP13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 8 | |
359 // CHECK-A32-SOFTFP-NEXT: [[TMP14:%.*]] = bitcast <8 x bfloat>* [[COERCE4_I]] to <4 x i32>* | |
360 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[TMP14]], align 8 | |
361 // CHECK-A32-SOFTFP-NEXT: [[TMP15:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[COERCE4_I]], align 8 | |
362 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[TMP15]], <8 x bfloat>* [[RETVAL_I]], align 8 | |
363 // CHECK-A32-SOFTFP-NEXT: [[TMP16:%.*]] = bitcast <8 x bfloat>* [[RETVAL_I]] to <4 x i32>* | |
364 // CHECK-A32-SOFTFP-NEXT: [[TMP17:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 8 | |
365 // CHECK-A32-SOFTFP-NEXT: [[TMP18:%.*]] = bitcast <8 x bfloat>* [[COERCE]] to <4 x i32>* | |
366 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP18]], align 8 | |
367 // CHECK-A32-SOFTFP-NEXT: [[TMP19:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[COERCE]], align 8 | |
368 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[TMP19]], <8 x bfloat>* [[RETVAL]], align 8 | |
369 // CHECK-A32-SOFTFP-NEXT: [[TMP20:%.*]] = bitcast <8 x bfloat>* [[RETVAL]] to <4 x i32>* | |
370 // CHECK-A32-SOFTFP-NEXT: [[TMP21:%.*]] = load <4 x i32>, <4 x i32>* [[TMP20]], align 8 | |
371 // CHECK-A32-SOFTFP-NEXT: ret <4 x i32> [[TMP21]] | |
372 // | |
59 bfloat16x8_t test_vcvtq_low_bf16_f32(float32x4_t a) { | 373 bfloat16x8_t test_vcvtq_low_bf16_f32(float32x4_t a) { |
60 return vcvtq_low_bf16_f32(a); | 374 return vcvtq_low_bf16_f32(a); |
61 } | 375 } |
62 | 376 |
63 // CHECK-LABEL: test_vcvtq_high_bf16_f32 | 377 // CHECK-A64-LABEL: @test_vcvtq_high_bf16_f32( |
64 // CHECK-A64: call <8 x bfloat> @llvm.aarch64.neon.bfcvtn2(<8 x bfloat> %inactive, <4 x float> %a) | 378 // CHECK-A64-NEXT: entry: |
65 // CHECK-A32-HARDFP: %[[CVT:.*]] = call <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16(<4 x float> %a) | 379 // CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[INACTIVE:%.*]] to <16 x i8> |
66 // CHECK-A32-HARDFP: %[[INACT:.*]] = shufflevector <8 x bfloat> %inactive, <8 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 380 // CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> |
67 // CHECK-A32-HARDFP: shufflevector <4 x bfloat> %[[CVT]], <4 x bfloat> %[[INACT]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | 381 // CHECK-A64-NEXT: [[VCVTQ_HIGH_BF16_V2_I:%.*]] = call <8 x bfloat> @llvm.aarch64.neon.bfcvtn2(<8 x bfloat> [[INACTIVE]], <4 x float> [[A]]) #[[ATTR3]] |
68 // CHECK-A32-SOFTFP: call <4 x i16> @llvm.arm.neon.vcvtfp2bf.v4i16(<4 x float> %a) | 382 // CHECK-A64-NEXT: [[VCVTQ_HIGH_BF16_V3_I:%.*]] = bitcast <8 x bfloat> [[VCVTQ_HIGH_BF16_V2_I]] to <16 x i8> |
69 // CHECK-A32-SOFTFP: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 383 // CHECK-A64-NEXT: ret <8 x bfloat> [[VCVTQ_HIGH_BF16_V2_I]] |
70 // CHECK-A32-SOFTFP: shufflevector <4 x bfloat> %{{.*}}, <4 x bfloat> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | 384 // |
385 // CHECK-A32-HARDFP-LABEL: @test_vcvtq_high_bf16_f32( | |
386 // CHECK-A32-HARDFP-NEXT: entry: | |
387 // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> | |
388 // CHECK-A32-HARDFP-NEXT: [[VCVTFP2BF1_I:%.*]] = call <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16(<4 x float> [[A]]) #[[ATTR3]] | |
389 // CHECK-A32-HARDFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[INACTIVE:%.*]], <8 x bfloat> [[INACTIVE]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> | |
390 // CHECK-A32-HARDFP-NEXT: [[SHUFFLE_I8:%.*]] = shufflevector <4 x bfloat> [[VCVTFP2BF1_I]], <4 x bfloat> [[SHUFFLE_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | |
391 // CHECK-A32-HARDFP-NEXT: ret <8 x bfloat> [[SHUFFLE_I8]] | |
392 // | |
393 // CHECK-A32-SOFTFP-LABEL: @test_vcvtq_high_bf16_f32( | |
394 // CHECK-A32-SOFTFP-NEXT: entry: | |
395 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I11:%.*]] = alloca <8 x bfloat>, align 8 | |
396 // CHECK-A32-SOFTFP-NEXT: [[__P0_I12:%.*]] = alloca <4 x bfloat>, align 8 | |
397 // CHECK-A32-SOFTFP-NEXT: [[__P1_I:%.*]] = alloca <4 x bfloat>, align 8 | |
398 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I8:%.*]] = alloca <4 x bfloat>, align 8 | |
399 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I3:%.*]] = alloca <4 x bfloat>, align 8 | |
400 // CHECK-A32-SOFTFP-NEXT: [[__P0_I4:%.*]] = alloca <8 x bfloat>, align 8 | |
401 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I:%.*]] = alloca <8 x bfloat>, align 8 | |
402 // CHECK-A32-SOFTFP-NEXT: [[__P0_I:%.*]] = alloca <8 x bfloat>, align 8 | |
403 // CHECK-A32-SOFTFP-NEXT: [[COERCE_I:%.*]] = alloca <4 x bfloat>, align 8 | |
404 // CHECK-A32-SOFTFP-NEXT: [[COERCE2_I:%.*]] = alloca <8 x bfloat>, align 8 | |
405 // CHECK-A32-SOFTFP-NEXT: [[COERCE4_I:%.*]] = alloca <4 x bfloat>, align 8 | |
406 // CHECK-A32-SOFTFP-NEXT: [[COERCE5_I:%.*]] = alloca <4 x bfloat>, align 8 | |
407 // CHECK-A32-SOFTFP-NEXT: [[COERCE6_I:%.*]] = alloca <4 x bfloat>, align 8 | |
408 // CHECK-A32-SOFTFP-NEXT: [[COERCE8_I:%.*]] = alloca <8 x bfloat>, align 8 | |
409 // CHECK-A32-SOFTFP-NEXT: [[RETVAL:%.*]] = alloca <8 x bfloat>, align 8 | |
410 // CHECK-A32-SOFTFP-NEXT: [[INACTIVE:%.*]] = alloca <8 x bfloat>, align 8 | |
411 // CHECK-A32-SOFTFP-NEXT: [[COERCE:%.*]] = alloca <8 x bfloat>, align 8 | |
412 // CHECK-A32-SOFTFP-NEXT: [[COERCE2:%.*]] = alloca <8 x bfloat>, align 8 | |
413 // CHECK-A32-SOFTFP-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat>* [[INACTIVE]] to <4 x i32>* | |
414 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[INACTIVE_COERCE:%.*]], <4 x i32>* [[TMP0]], align 8 | |
415 // CHECK-A32-SOFTFP-NEXT: [[INACTIVE1:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[INACTIVE]], align 8 | |
416 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[INACTIVE1]], <8 x bfloat>* [[COERCE]], align 8 | |
417 // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat>* [[COERCE]] to <4 x i32>* | |
418 // CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 8 | |
419 // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = bitcast <8 x bfloat>* [[__P0_I]] to <4 x i32>* | |
420 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 8 | |
421 // CHECK-A32-SOFTFP-NEXT: [[__P01_I:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[__P0_I]], align 8 | |
422 // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> | |
423 // CHECK-A32-SOFTFP-NEXT: [[VCVTFP2BF1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2bf.v4i16(<4 x float> [[A]]) #[[ATTR3]] | |
424 // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[VCVTFP2BF1_I]] to <4 x bfloat> | |
425 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP5]], <4 x bfloat>* [[RETVAL_I8]], align 8 | |
426 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x bfloat>* [[RETVAL_I8]] to <2 x i32>* | |
427 // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP6]], align 8 | |
428 // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = bitcast <4 x bfloat>* [[COERCE_I]] to <2 x i32>* | |
429 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP7]], <2 x i32>* [[TMP8]], align 8 | |
430 // CHECK-A32-SOFTFP-NEXT: [[TMP9:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[COERCE_I]], align 8 | |
431 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[__P01_I]], <8 x bfloat>* [[COERCE2_I]], align 8 | |
432 // CHECK-A32-SOFTFP-NEXT: [[TMP10:%.*]] = bitcast <8 x bfloat>* [[COERCE2_I]] to <4 x i32>* | |
433 // CHECK-A32-SOFTFP-NEXT: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 8 | |
434 // CHECK-A32-SOFTFP-NEXT: [[TMP12:%.*]] = bitcast <8 x bfloat>* [[__P0_I4]] to <4 x i32>* | |
435 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* [[TMP12]], align 8 | |
436 // CHECK-A32-SOFTFP-NEXT: [[__P01_I7:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[__P0_I4]], align 8 | |
437 // CHECK-A32-SOFTFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[__P01_I7]], <8 x bfloat> [[__P01_I7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> | |
438 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], <4 x bfloat>* [[RETVAL_I3]], align 8 | |
439 // CHECK-A32-SOFTFP-NEXT: [[TMP13:%.*]] = bitcast <4 x bfloat>* [[RETVAL_I3]] to <2 x i32>* | |
440 // CHECK-A32-SOFTFP-NEXT: [[TMP14:%.*]] = load <2 x i32>, <2 x i32>* [[TMP13]], align 8 | |
441 // CHECK-A32-SOFTFP-NEXT: [[TMP15:%.*]] = bitcast <4 x bfloat>* [[COERCE4_I]] to <2 x i32>* | |
442 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP14]], <2 x i32>* [[TMP15]], align 8 | |
443 // CHECK-A32-SOFTFP-NEXT: [[TMP16:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[COERCE4_I]], align 8 | |
444 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP9]], <4 x bfloat>* [[COERCE5_I]], align 8 | |
445 // CHECK-A32-SOFTFP-NEXT: [[TMP17:%.*]] = bitcast <4 x bfloat>* [[COERCE5_I]] to <2 x i32>* | |
446 // CHECK-A32-SOFTFP-NEXT: [[TMP18:%.*]] = load <2 x i32>, <2 x i32>* [[TMP17]], align 8 | |
447 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP16]], <4 x bfloat>* [[COERCE6_I]], align 8 | |
448 // CHECK-A32-SOFTFP-NEXT: [[TMP19:%.*]] = bitcast <4 x bfloat>* [[COERCE6_I]] to <2 x i32>* | |
449 // CHECK-A32-SOFTFP-NEXT: [[TMP20:%.*]] = load <2 x i32>, <2 x i32>* [[TMP19]], align 8 | |
450 // CHECK-A32-SOFTFP-NEXT: [[TMP21:%.*]] = bitcast <4 x bfloat>* [[__P0_I12]] to <2 x i32>* | |
451 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP18]], <2 x i32>* [[TMP21]], align 8 | |
452 // CHECK-A32-SOFTFP-NEXT: [[__P01_I16:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[__P0_I12]], align 8 | |
453 // CHECK-A32-SOFTFP-NEXT: [[TMP22:%.*]] = bitcast <4 x bfloat>* [[__P1_I]] to <2 x i32>* | |
454 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP20]], <2 x i32>* [[TMP22]], align 8 | |
455 // CHECK-A32-SOFTFP-NEXT: [[__P12_I:%.*]] = load <4 x bfloat>, <4 x bfloat>* [[__P1_I]], align 8 | |
456 // CHECK-A32-SOFTFP-NEXT: [[SHUFFLE_I17:%.*]] = shufflevector <4 x bfloat> [[__P01_I16]], <4 x bfloat> [[__P12_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | |
457 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[SHUFFLE_I17]], <8 x bfloat>* [[RETVAL_I11]], align 8 | |
458 // CHECK-A32-SOFTFP-NEXT: [[TMP23:%.*]] = bitcast <8 x bfloat>* [[RETVAL_I11]] to <4 x i32>* | |
459 // CHECK-A32-SOFTFP-NEXT: [[TMP24:%.*]] = load <4 x i32>, <4 x i32>* [[TMP23]], align 8 | |
460 // CHECK-A32-SOFTFP-NEXT: [[TMP25:%.*]] = bitcast <8 x bfloat>* [[COERCE8_I]] to <4 x i32>* | |
461 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP24]], <4 x i32>* [[TMP25]], align 8 | |
462 // CHECK-A32-SOFTFP-NEXT: [[TMP26:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[COERCE8_I]], align 8 | |
463 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[TMP26]], <8 x bfloat>* [[RETVAL_I]], align 8 | |
464 // CHECK-A32-SOFTFP-NEXT: [[TMP27:%.*]] = bitcast <8 x bfloat>* [[RETVAL_I]] to <4 x i32>* | |
465 // CHECK-A32-SOFTFP-NEXT: [[TMP28:%.*]] = load <4 x i32>, <4 x i32>* [[TMP27]], align 8 | |
466 // CHECK-A32-SOFTFP-NEXT: [[TMP29:%.*]] = bitcast <8 x bfloat>* [[COERCE2]] to <4 x i32>* | |
467 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP28]], <4 x i32>* [[TMP29]], align 8 | |
468 // CHECK-A32-SOFTFP-NEXT: [[TMP30:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[COERCE2]], align 8 | |
469 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[TMP30]], <8 x bfloat>* [[RETVAL]], align 8 | |
470 // CHECK-A32-SOFTFP-NEXT: [[TMP31:%.*]] = bitcast <8 x bfloat>* [[RETVAL]] to <4 x i32>* | |
471 // CHECK-A32-SOFTFP-NEXT: [[TMP32:%.*]] = load <4 x i32>, <4 x i32>* [[TMP31]], align 8 | |
472 // CHECK-A32-SOFTFP-NEXT: ret <4 x i32> [[TMP32]] | |
473 // | |
71 bfloat16x8_t test_vcvtq_high_bf16_f32(bfloat16x8_t inactive, float32x4_t a) { | 474 bfloat16x8_t test_vcvtq_high_bf16_f32(bfloat16x8_t inactive, float32x4_t a) { |
72 return vcvtq_high_bf16_f32(inactive, a); | 475 return vcvtq_high_bf16_f32(inactive, a); |
73 } | 476 } |
74 | 477 |
75 // CHECK-LABEL: test_vcvth_bf16_f32 | 478 // CHECK-A64-LABEL: @test_vcvth_bf16_f32( |
76 // CHECK-A64: call bfloat @llvm.aarch64.neon.bfcvt(float %a) | 479 // CHECK-A64-NEXT: entry: |
77 // CHECK-A32-HARDFP: call bfloat @llvm.arm.neon.vcvtbfp2bf(float %a) | 480 // CHECK-A64-NEXT: [[VCVTH_BF16_F32_I:%.*]] = call bfloat @llvm.aarch64.neon.bfcvt(float [[A:%.*]]) #[[ATTR3]] |
78 // CHECK-A32-SOFTFP: call bfloat @llvm.arm.neon.vcvtbfp2bf(float %a) | 481 // CHECK-A64-NEXT: ret bfloat [[VCVTH_BF16_F32_I]] |
482 // | |
483 // CHECK-A32-HARDFP-LABEL: @test_vcvth_bf16_f32( | |
484 // CHECK-A32-HARDFP-NEXT: entry: | |
485 // CHECK-A32-HARDFP-NEXT: [[VCVTBFP2BF_I:%.*]] = call bfloat @llvm.arm.neon.vcvtbfp2bf(float [[A:%.*]]) #[[ATTR3]] | |
486 // CHECK-A32-HARDFP-NEXT: ret bfloat [[VCVTBFP2BF_I]] | |
487 // | |
488 // CHECK-A32-SOFTFP-LABEL: @test_vcvth_bf16_f32( | |
489 // CHECK-A32-SOFTFP-NEXT: entry: | |
490 // CHECK-A32-SOFTFP-NEXT: [[VCVTBFP2BF_I:%.*]] = call bfloat @llvm.arm.neon.vcvtbfp2bf(float [[A:%.*]]) #[[ATTR3]] | |
491 // CHECK-A32-SOFTFP-NEXT: ret bfloat [[VCVTBFP2BF_I]] | |
492 // | |
79 bfloat16_t test_vcvth_bf16_f32(float32_t a) { | 493 bfloat16_t test_vcvth_bf16_f32(float32_t a) { |
80 return vcvth_bf16_f32(a); | 494 return vcvth_bf16_f32(a); |
81 } | 495 } |
82 | 496 |
83 // CHECK-LABEL: test_vcvtah_f32_bf16 | 497 // CHECK-LABEL: @test_vcvtah_f32_bf16( |
84 // CHECK: shl i32 %{{.*}}, 16 | 498 // CHECK-NEXT: entry: |
499 // CHECK-NEXT: [[__REINT_I:%.*]] = alloca bfloat, align 2 | |
500 // CHECK-NEXT: [[__REINT1_I:%.*]] = alloca i32, align 4 | |
501 // CHECK-NEXT: store bfloat [[A:%.*]], bfloat* [[__REINT_I]], align 2 | |
502 // CHECK-NEXT: [[TMP0:%.*]] = bitcast bfloat* [[__REINT_I]] to i32* | |
503 // CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 | |
504 // CHECK-NEXT: [[SHL_I:%.*]] = shl i32 [[TMP1]], 16 | |
505 // CHECK-NEXT: store i32 [[SHL_I]], i32* [[__REINT1_I]], align 4 | |
506 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[__REINT1_I]] to float* | |
507 // CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4 | |
508 // CHECK-NEXT: ret float [[TMP3]] | |
509 // | |
85 float32_t test_vcvtah_f32_bf16(bfloat16_t a) { | 510 float32_t test_vcvtah_f32_bf16(bfloat16_t a) { |
86 return vcvtah_f32_bf16(a); | 511 return vcvtah_f32_bf16(a); |
87 } | 512 } |
88 | 513 |