150
|
1 /*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
|
|
2 *
|
|
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 * See https://llvm.org/LICENSE.txt for license information.
|
|
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 *
|
|
7 *===-----------------------------------------------------------------------===
|
|
8 */
|
|
9
|
|
10 #if !defined __IMMINTRIN_H
|
|
11 #error "Never use <f16cintrin.h> directly; include <immintrin.h> instead."
|
|
12 #endif
|
|
13
|
|
14 #ifndef __F16CINTRIN_H
|
|
15 #define __F16CINTRIN_H
|
|
16
|
|
17 /* Define the default attributes for the functions in this file. */
|
|
18 #define __DEFAULT_FN_ATTRS128 \
|
|
19 __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
|
|
20 #define __DEFAULT_FN_ATTRS256 \
|
|
21 __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
|
|
22
|
|
23 /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
|
|
24 * but that's because icc can emulate these without f16c using a library call.
|
|
25 * Since we don't do that let's leave these in f16cintrin.h.
|
|
26 */
|
|
27
|
|
28 /// Converts a 16-bit half-precision float value into a 32-bit float
|
|
29 /// value.
|
|
30 ///
|
|
31 /// \headerfile <x86intrin.h>
|
|
32 ///
|
|
33 /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
|
|
34 ///
|
|
35 /// \param __a
|
|
36 /// A 16-bit half-precision float value.
|
|
37 /// \returns The converted 32-bit float value.
|
|
38 static __inline float __DEFAULT_FN_ATTRS128
|
|
39 _cvtsh_ss(unsigned short __a)
|
|
40 {
|
|
41 __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
|
|
42 __v4sf __r = __builtin_ia32_vcvtph2ps(__v);
|
|
43 return __r[0];
|
|
44 }
|
|
45
|
|
46 /// Converts a 32-bit single-precision float value to a 16-bit
|
|
47 /// half-precision float value.
|
|
48 ///
|
|
49 /// \headerfile <x86intrin.h>
|
|
50 ///
|
|
51 /// \code
|
|
52 /// unsigned short _cvtss_sh(float a, const int imm);
|
|
53 /// \endcode
|
|
54 ///
|
|
55 /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
|
|
56 ///
|
|
57 /// \param a
|
|
58 /// A 32-bit single-precision float value to be converted to a 16-bit
|
|
59 /// half-precision float value.
|
|
60 /// \param imm
|
|
61 /// An immediate value controlling rounding using bits [2:0]: \n
|
|
62 /// 000: Nearest \n
|
|
63 /// 001: Down \n
|
|
64 /// 010: Up \n
|
|
65 /// 011: Truncate \n
|
|
66 /// 1XX: Use MXCSR.RC for rounding
|
|
67 /// \returns The converted 16-bit half-precision float value.
|
|
68 #define _cvtss_sh(a, imm) \
|
|
69 (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
|
|
70 (imm)))[0])
|
|
71
|
|
72 /// Converts a 128-bit vector containing 32-bit float values into a
|
|
73 /// 128-bit vector containing 16-bit half-precision float values.
|
|
74 ///
|
|
75 /// \headerfile <x86intrin.h>
|
|
76 ///
|
|
77 /// \code
|
|
78 /// __m128i _mm_cvtps_ph(__m128 a, const int imm);
|
|
79 /// \endcode
|
|
80 ///
|
|
81 /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
|
|
82 ///
|
|
83 /// \param a
|
|
84 /// A 128-bit vector containing 32-bit float values.
|
|
85 /// \param imm
|
|
86 /// An immediate value controlling rounding using bits [2:0]: \n
|
|
87 /// 000: Nearest \n
|
|
88 /// 001: Down \n
|
|
89 /// 010: Up \n
|
|
90 /// 011: Truncate \n
|
|
91 /// 1XX: Use MXCSR.RC for rounding
|
|
92 /// \returns A 128-bit vector containing converted 16-bit half-precision float
|
|
93 /// values. The lower 64 bits are used to store the converted 16-bit
|
|
94 /// half-precision floating-point values.
|
|
95 #define _mm_cvtps_ph(a, imm) \
|
|
96 (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))
|
|
97
|
|
98 /// Converts a 128-bit vector containing 16-bit half-precision float
|
|
99 /// values into a 128-bit vector containing 32-bit float values.
|
|
100 ///
|
|
101 /// \headerfile <x86intrin.h>
|
|
102 ///
|
|
103 /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
|
|
104 ///
|
|
105 /// \param __a
|
|
106 /// A 128-bit vector containing 16-bit half-precision float values. The lower
|
|
107 /// 64 bits are used in the conversion.
|
|
108 /// \returns A 128-bit vector of [4 x float] containing converted float values.
|
|
109 static __inline __m128 __DEFAULT_FN_ATTRS128
|
|
110 _mm_cvtph_ps(__m128i __a)
|
|
111 {
|
|
112 return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
|
|
113 }
|
|
114
|
|
115 /// Converts a 256-bit vector of [8 x float] into a 128-bit vector
|
|
116 /// containing 16-bit half-precision float values.
|
|
117 ///
|
|
118 /// \headerfile <x86intrin.h>
|
|
119 ///
|
|
120 /// \code
|
|
121 /// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
|
|
122 /// \endcode
|
|
123 ///
|
|
124 /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
|
|
125 ///
|
|
126 /// \param a
|
|
127 /// A 256-bit vector containing 32-bit single-precision float values to be
|
|
128 /// converted to 16-bit half-precision float values.
|
|
129 /// \param imm
|
|
130 /// An immediate value controlling rounding using bits [2:0]: \n
|
|
131 /// 000: Nearest \n
|
|
132 /// 001: Down \n
|
|
133 /// 010: Up \n
|
|
134 /// 011: Truncate \n
|
|
135 /// 1XX: Use MXCSR.RC for rounding
|
|
136 /// \returns A 128-bit vector containing the converted 16-bit half-precision
|
|
137 /// float values.
|
|
138 #define _mm256_cvtps_ph(a, imm) \
|
|
139 (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))
|
|
140
|
|
141 /// Converts a 128-bit vector containing 16-bit half-precision float
|
|
142 /// values into a 256-bit vector of [8 x float].
|
|
143 ///
|
|
144 /// \headerfile <x86intrin.h>
|
|
145 ///
|
|
146 /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
|
|
147 ///
|
|
148 /// \param __a
|
|
149 /// A 128-bit vector containing 16-bit half-precision float values to be
|
|
150 /// converted to 32-bit single-precision float values.
|
|
151 /// \returns A vector of [8 x float] containing the converted 32-bit
|
|
152 /// single-precision float values.
|
|
153 static __inline __m256 __DEFAULT_FN_ATTRS256
|
|
154 _mm256_cvtph_ps(__m128i __a)
|
|
155 {
|
|
156 return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
|
|
157 }
|
|
158
|
|
159 #undef __DEFAULT_FN_ATTRS128
|
|
160 #undef __DEFAULT_FN_ATTRS256
|
|
161
|
|
162 #endif /* __F16CINTRIN_H */
|