CbC/CbC_llvm: clang/lib/Headers/f16cintrin.h annotate

annotate clang/lib/Headers/f16cintrin.h @ 176:de4ac79aef9d

...

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Mon, 25 May 2020 17:13:11 +0900
parents	1d019706d866
children	c4bab56944e8

rev	line source
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1 /*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
1d019706d866 LLVM10 anatofuz parents: diff changeset	2 *
1d019706d866 LLVM10 anatofuz parents: diff changeset	3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
1d019706d866 LLVM10 anatofuz parents: diff changeset	4 * See https://llvm.org/LICENSE.txt for license information.
1d019706d866 LLVM10 anatofuz parents: diff changeset	5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
1d019706d866 LLVM10 anatofuz parents: diff changeset	6 *
1d019706d866 LLVM10 anatofuz parents: diff changeset	7 *===-----------------------------------------------------------------------===
1d019706d866 LLVM10 anatofuz parents: diff changeset	8 */
1d019706d866 LLVM10 anatofuz parents: diff changeset	9
1d019706d866 LLVM10 anatofuz parents: diff changeset	10 #if !defined __IMMINTRIN_H
1d019706d866 LLVM10 anatofuz parents: diff changeset	11 #error "Never use <f16cintrin.h> directly; include <immintrin.h> instead."
1d019706d866 LLVM10 anatofuz parents: diff changeset	12 #endif
1d019706d866 LLVM10 anatofuz parents: diff changeset	13
1d019706d866 LLVM10 anatofuz parents: diff changeset	14 #ifndef __F16CINTRIN_H
1d019706d866 LLVM10 anatofuz parents: diff changeset	15 #define __F16CINTRIN_H
1d019706d866 LLVM10 anatofuz parents: diff changeset	16
1d019706d866 LLVM10 anatofuz parents: diff changeset	17 /* Define the default attributes for the functions in this file. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	18 #define __DEFAULT_FN_ATTRS128 \
1d019706d866 LLVM10 anatofuz parents: diff changeset	19 __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
1d019706d866 LLVM10 anatofuz parents: diff changeset	20 #define __DEFAULT_FN_ATTRS256 \
1d019706d866 LLVM10 anatofuz parents: diff changeset	21 __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
1d019706d866 LLVM10 anatofuz parents: diff changeset	22
1d019706d866 LLVM10 anatofuz parents: diff changeset	23 /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
1d019706d866 LLVM10 anatofuz parents: diff changeset	24 * but that's because icc can emulate these without f16c using a library call.
1d019706d866 LLVM10 anatofuz parents: diff changeset	25 * Since we don't do that let's leave these in f16cintrin.h.
1d019706d866 LLVM10 anatofuz parents: diff changeset	26 */
1d019706d866 LLVM10 anatofuz parents: diff changeset	27
1d019706d866 LLVM10 anatofuz parents: diff changeset	28 /// Converts a 16-bit half-precision float value into a 32-bit float
1d019706d866 LLVM10 anatofuz parents: diff changeset	29 /// value.
1d019706d866 LLVM10 anatofuz parents: diff changeset	30 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	31 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	32 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	33 /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	34 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	35 /// \param __a
1d019706d866 LLVM10 anatofuz parents: diff changeset	36 /// A 16-bit half-precision float value.
1d019706d866 LLVM10 anatofuz parents: diff changeset	37 /// \returns The converted 32-bit float value.
1d019706d866 LLVM10 anatofuz parents: diff changeset	38 static __inline float __DEFAULT_FN_ATTRS128
1d019706d866 LLVM10 anatofuz parents: diff changeset	39 _cvtsh_ss(unsigned short __a)
1d019706d866 LLVM10 anatofuz parents: diff changeset	40 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	41 __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
1d019706d866 LLVM10 anatofuz parents: diff changeset	42 __v4sf __r = __builtin_ia32_vcvtph2ps(__v);
1d019706d866 LLVM10 anatofuz parents: diff changeset	43 return __r[0];
1d019706d866 LLVM10 anatofuz parents: diff changeset	44 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	45
1d019706d866 LLVM10 anatofuz parents: diff changeset	46 /// Converts a 32-bit single-precision float value to a 16-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	47 /// half-precision float value.
1d019706d866 LLVM10 anatofuz parents: diff changeset	48 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	49 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	50 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	51 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	52 /// unsigned short _cvtss_sh(float a, const int imm);
1d019706d866 LLVM10 anatofuz parents: diff changeset	53 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	54 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	55 /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	56 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	57 /// \param a
1d019706d866 LLVM10 anatofuz parents: diff changeset	58 /// A 32-bit single-precision float value to be converted to a 16-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	59 /// half-precision float value.
1d019706d866 LLVM10 anatofuz parents: diff changeset	60 /// \param imm
1d019706d866 LLVM10 anatofuz parents: diff changeset	61 /// An immediate value controlling rounding using bits [2:0]: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	62 /// 000: Nearest \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	63 /// 001: Down \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	64 /// 010: Up \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	65 /// 011: Truncate \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	66 /// 1XX: Use MXCSR.RC for rounding
1d019706d866 LLVM10 anatofuz parents: diff changeset	67 /// \returns The converted 16-bit half-precision float value.
1d019706d866 LLVM10 anatofuz parents: diff changeset	68 #define _cvtss_sh(a, imm) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	69 (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
1d019706d866 LLVM10 anatofuz parents: diff changeset	70 (imm)))[0])
1d019706d866 LLVM10 anatofuz parents: diff changeset	71
1d019706d866 LLVM10 anatofuz parents: diff changeset	72 /// Converts a 128-bit vector containing 32-bit float values into a
1d019706d866 LLVM10 anatofuz parents: diff changeset	73 /// 128-bit vector containing 16-bit half-precision float values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	74 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	75 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	76 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	77 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	78 /// __m128i _mm_cvtps_ph(__m128 a, const int imm);
1d019706d866 LLVM10 anatofuz parents: diff changeset	79 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	80 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	81 /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	82 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	83 /// \param a
1d019706d866 LLVM10 anatofuz parents: diff changeset	84 /// A 128-bit vector containing 32-bit float values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	85 /// \param imm
1d019706d866 LLVM10 anatofuz parents: diff changeset	86 /// An immediate value controlling rounding using bits [2:0]: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	87 /// 000: Nearest \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	88 /// 001: Down \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	89 /// 010: Up \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	90 /// 011: Truncate \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	91 /// 1XX: Use MXCSR.RC for rounding
1d019706d866 LLVM10 anatofuz parents: diff changeset	92 /// \returns A 128-bit vector containing converted 16-bit half-precision float
1d019706d866 LLVM10 anatofuz parents: diff changeset	93 /// values. The lower 64 bits are used to store the converted 16-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	94 /// half-precision floating-point values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	95 #define _mm_cvtps_ph(a, imm) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	96 (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))
1d019706d866 LLVM10 anatofuz parents: diff changeset	97
1d019706d866 LLVM10 anatofuz parents: diff changeset	98 /// Converts a 128-bit vector containing 16-bit half-precision float
1d019706d866 LLVM10 anatofuz parents: diff changeset	99 /// values into a 128-bit vector containing 32-bit float values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	100 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	101 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	102 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	103 /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	104 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	105 /// \param __a
1d019706d866 LLVM10 anatofuz parents: diff changeset	106 /// A 128-bit vector containing 16-bit half-precision float values. The lower
1d019706d866 LLVM10 anatofuz parents: diff changeset	107 /// 64 bits are used in the conversion.
1d019706d866 LLVM10 anatofuz parents: diff changeset	108 /// \returns A 128-bit vector of [4 x float] containing converted float values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	109 static __inline __m128 __DEFAULT_FN_ATTRS128
1d019706d866 LLVM10 anatofuz parents: diff changeset	110 _mm_cvtph_ps(__m128i __a)
1d019706d866 LLVM10 anatofuz parents: diff changeset	111 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	112 return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
1d019706d866 LLVM10 anatofuz parents: diff changeset	113 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	114
1d019706d866 LLVM10 anatofuz parents: diff changeset	115 /// Converts a 256-bit vector of [8 x float] into a 128-bit vector
1d019706d866 LLVM10 anatofuz parents: diff changeset	116 /// containing 16-bit half-precision float values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	117 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	118 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	119 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	120 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	121 /// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
1d019706d866 LLVM10 anatofuz parents: diff changeset	122 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	123 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	124 /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	125 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	126 /// \param a
1d019706d866 LLVM10 anatofuz parents: diff changeset	127 /// A 256-bit vector containing 32-bit single-precision float values to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	128 /// converted to 16-bit half-precision float values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	129 /// \param imm
1d019706d866 LLVM10 anatofuz parents: diff changeset	130 /// An immediate value controlling rounding using bits [2:0]: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	131 /// 000: Nearest \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	132 /// 001: Down \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	133 /// 010: Up \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	134 /// 011: Truncate \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	135 /// 1XX: Use MXCSR.RC for rounding
1d019706d866 LLVM10 anatofuz parents: diff changeset	136 /// \returns A 128-bit vector containing the converted 16-bit half-precision
1d019706d866 LLVM10 anatofuz parents: diff changeset	137 /// float values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	138 #define _mm256_cvtps_ph(a, imm) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	139 (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))
1d019706d866 LLVM10 anatofuz parents: diff changeset	140
1d019706d866 LLVM10 anatofuz parents: diff changeset	141 /// Converts a 128-bit vector containing 16-bit half-precision float
1d019706d866 LLVM10 anatofuz parents: diff changeset	142 /// values into a 256-bit vector of [8 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	143 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	144 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	145 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	146 /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	147 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	148 /// \param __a
1d019706d866 LLVM10 anatofuz parents: diff changeset	149 /// A 128-bit vector containing 16-bit half-precision float values to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	150 /// converted to 32-bit single-precision float values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	151 /// \returns A vector of [8 x float] containing the converted 32-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	152 /// single-precision float values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	153 static __inline __m256 __DEFAULT_FN_ATTRS256
1d019706d866 LLVM10 anatofuz parents: diff changeset	154 _mm256_cvtph_ps(__m128i __a)
1d019706d866 LLVM10 anatofuz parents: diff changeset	155 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	156 return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
1d019706d866 LLVM10 anatofuz parents: diff changeset	157 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	158
1d019706d866 LLVM10 anatofuz parents: diff changeset	159 #undef __DEFAULT_FN_ATTRS128
1d019706d866 LLVM10 anatofuz parents: diff changeset	160 #undef __DEFAULT_FN_ATTRS256
1d019706d866 LLVM10 anatofuz parents: diff changeset	161
1d019706d866 LLVM10 anatofuz parents: diff changeset	162 #endif /* __F16CINTRIN_H */

Mercurial > hg > CbC > CbC_llvm

annotate clang/lib/Headers/f16cintrin.h @ 176:de4ac79aef9d