CbC/CbC_llvm: clang/lib/Headers/smmintrin.h annotate

annotate clang/lib/Headers/smmintrin.h @ 176:de4ac79aef9d

...

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Mon, 25 May 2020 17:13:11 +0900
parents	1d019706d866
children	c4bab56944e8

rev	line source
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1 /*===---- smmintrin.h - SSE4 intrinsics ------------------------------------===
1d019706d866 LLVM10 anatofuz parents: diff changeset	2 *
1d019706d866 LLVM10 anatofuz parents: diff changeset	3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
1d019706d866 LLVM10 anatofuz parents: diff changeset	4 * See https://llvm.org/LICENSE.txt for license information.
1d019706d866 LLVM10 anatofuz parents: diff changeset	5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
1d019706d866 LLVM10 anatofuz parents: diff changeset	6 *
1d019706d866 LLVM10 anatofuz parents: diff changeset	7 *===-----------------------------------------------------------------------===
1d019706d866 LLVM10 anatofuz parents: diff changeset	8 */
1d019706d866 LLVM10 anatofuz parents: diff changeset	9
1d019706d866 LLVM10 anatofuz parents: diff changeset	10 #ifndef __SMMINTRIN_H
1d019706d866 LLVM10 anatofuz parents: diff changeset	11 #define __SMMINTRIN_H
1d019706d866 LLVM10 anatofuz parents: diff changeset	12
1d019706d866 LLVM10 anatofuz parents: diff changeset	13 #include <tmmintrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	14
1d019706d866 LLVM10 anatofuz parents: diff changeset	15 /* Define the default attributes for the functions in this file. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	16 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), __min_vector_width__(128)))
1d019706d866 LLVM10 anatofuz parents: diff changeset	17
1d019706d866 LLVM10 anatofuz parents: diff changeset	18 /* SSE4 Rounding macros. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	19 #define _MM_FROUND_TO_NEAREST_INT 0x00
1d019706d866 LLVM10 anatofuz parents: diff changeset	20 #define _MM_FROUND_TO_NEG_INF 0x01
1d019706d866 LLVM10 anatofuz parents: diff changeset	21 #define _MM_FROUND_TO_POS_INF 0x02
1d019706d866 LLVM10 anatofuz parents: diff changeset	22 #define _MM_FROUND_TO_ZERO 0x03
1d019706d866 LLVM10 anatofuz parents: diff changeset	23 #define _MM_FROUND_CUR_DIRECTION 0x04
1d019706d866 LLVM10 anatofuz parents: diff changeset	24
1d019706d866 LLVM10 anatofuz parents: diff changeset	25 #define _MM_FROUND_RAISE_EXC 0x00
1d019706d866 LLVM10 anatofuz parents: diff changeset	26 #define _MM_FROUND_NO_EXC 0x08
1d019706d866 LLVM10 anatofuz parents: diff changeset	27
1d019706d866 LLVM10 anatofuz parents: diff changeset	28 #define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC \| _MM_FROUND_TO_NEAREST_INT)
1d019706d866 LLVM10 anatofuz parents: diff changeset	29 #define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC \| _MM_FROUND_TO_NEG_INF)
1d019706d866 LLVM10 anatofuz parents: diff changeset	30 #define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC \| _MM_FROUND_TO_POS_INF)
1d019706d866 LLVM10 anatofuz parents: diff changeset	31 #define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC \| _MM_FROUND_TO_ZERO)
1d019706d866 LLVM10 anatofuz parents: diff changeset	32 #define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC \| _MM_FROUND_CUR_DIRECTION)
1d019706d866 LLVM10 anatofuz parents: diff changeset	33 #define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC \| _MM_FROUND_CUR_DIRECTION)
1d019706d866 LLVM10 anatofuz parents: diff changeset	34
1d019706d866 LLVM10 anatofuz parents: diff changeset	35 /// Rounds up each element of the 128-bit vector of [4 x float] to an
1d019706d866 LLVM10 anatofuz parents: diff changeset	36 /// integer and returns the rounded values in a 128-bit vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	37 /// [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	38 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	39 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	40 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	41 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	42 /// __m128 _mm_ceil_ps(__m128 X);
1d019706d866 LLVM10 anatofuz parents: diff changeset	43 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	44 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	45 /// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	46 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	47 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	48 /// A 128-bit vector of [4 x float] values to be rounded up.
1d019706d866 LLVM10 anatofuz parents: diff changeset	49 /// \returns A 128-bit vector of [4 x float] containing the rounded values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	50 #define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL)
1d019706d866 LLVM10 anatofuz parents: diff changeset	51
1d019706d866 LLVM10 anatofuz parents: diff changeset	52 /// Rounds up each element of the 128-bit vector of [2 x double] to an
1d019706d866 LLVM10 anatofuz parents: diff changeset	53 /// integer and returns the rounded values in a 128-bit vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	54 /// [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	55 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	56 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	57 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	58 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	59 /// __m128d _mm_ceil_pd(__m128d X);
1d019706d866 LLVM10 anatofuz parents: diff changeset	60 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	61 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	62 /// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	63 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	64 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	65 /// A 128-bit vector of [2 x double] values to be rounded up.
1d019706d866 LLVM10 anatofuz parents: diff changeset	66 /// \returns A 128-bit vector of [2 x double] containing the rounded values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	67 #define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL)
1d019706d866 LLVM10 anatofuz parents: diff changeset	68
1d019706d866 LLVM10 anatofuz parents: diff changeset	69 /// Copies three upper elements of the first 128-bit vector operand to
1d019706d866 LLVM10 anatofuz parents: diff changeset	70 /// the corresponding three upper elements of the 128-bit result vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	71 /// [4 x float]. Rounds up the lowest element of the second 128-bit vector
1d019706d866 LLVM10 anatofuz parents: diff changeset	72 /// operand to an integer and copies it to the lowest element of the 128-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	73 /// result vector of [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	74 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	75 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	76 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	77 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	78 /// __m128 _mm_ceil_ss(__m128 X, __m128 Y);
1d019706d866 LLVM10 anatofuz parents: diff changeset	79 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	80 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	81 /// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	82 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	83 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	84 /// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are
1d019706d866 LLVM10 anatofuz parents: diff changeset	85 /// copied to the corresponding bits of the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	86 /// \param Y
1d019706d866 LLVM10 anatofuz parents: diff changeset	87 /// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is
1d019706d866 LLVM10 anatofuz parents: diff changeset	88 /// rounded up to the nearest integer and copied to the corresponding bits
1d019706d866 LLVM10 anatofuz parents: diff changeset	89 /// of the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	90 /// \returns A 128-bit vector of [4 x float] containing the copied and rounded
1d019706d866 LLVM10 anatofuz parents: diff changeset	91 /// values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	92 #define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL)
1d019706d866 LLVM10 anatofuz parents: diff changeset	93
1d019706d866 LLVM10 anatofuz parents: diff changeset	94 /// Copies the upper element of the first 128-bit vector operand to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	95 /// corresponding upper element of the 128-bit result vector of [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	96 /// Rounds up the lower element of the second 128-bit vector operand to an
1d019706d866 LLVM10 anatofuz parents: diff changeset	97 /// integer and copies it to the lower element of the 128-bit result vector
1d019706d866 LLVM10 anatofuz parents: diff changeset	98 /// of [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	99 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	100 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	101 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	102 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	103 /// __m128d _mm_ceil_sd(__m128d X, __m128d Y);
1d019706d866 LLVM10 anatofuz parents: diff changeset	104 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	105 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	106 /// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	107 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	108 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	109 /// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is
1d019706d866 LLVM10 anatofuz parents: diff changeset	110 /// copied to the corresponding bits of the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	111 /// \param Y
1d019706d866 LLVM10 anatofuz parents: diff changeset	112 /// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is
1d019706d866 LLVM10 anatofuz parents: diff changeset	113 /// rounded up to the nearest integer and copied to the corresponding bits
1d019706d866 LLVM10 anatofuz parents: diff changeset	114 /// of the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	115 /// \returns A 128-bit vector of [2 x double] containing the copied and rounded
1d019706d866 LLVM10 anatofuz parents: diff changeset	116 /// values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	117 #define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL)
1d019706d866 LLVM10 anatofuz parents: diff changeset	118
1d019706d866 LLVM10 anatofuz parents: diff changeset	119 /// Rounds down each element of the 128-bit vector of [4 x float] to an
1d019706d866 LLVM10 anatofuz parents: diff changeset	120 /// an integer and returns the rounded values in a 128-bit vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	121 /// [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	122 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	123 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	124 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	125 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	126 /// __m128 _mm_floor_ps(__m128 X);
1d019706d866 LLVM10 anatofuz parents: diff changeset	127 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	128 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	129 /// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	130 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	131 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	132 /// A 128-bit vector of [4 x float] values to be rounded down.
1d019706d866 LLVM10 anatofuz parents: diff changeset	133 /// \returns A 128-bit vector of [4 x float] containing the rounded values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	134 #define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR)
1d019706d866 LLVM10 anatofuz parents: diff changeset	135
1d019706d866 LLVM10 anatofuz parents: diff changeset	136 /// Rounds down each element of the 128-bit vector of [2 x double] to an
1d019706d866 LLVM10 anatofuz parents: diff changeset	137 /// integer and returns the rounded values in a 128-bit vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	138 /// [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	139 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	140 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	141 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	142 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	143 /// __m128d _mm_floor_pd(__m128d X);
1d019706d866 LLVM10 anatofuz parents: diff changeset	144 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	145 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	146 /// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	147 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	148 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	149 /// A 128-bit vector of [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	150 /// \returns A 128-bit vector of [2 x double] containing the rounded values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	151 #define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR)
1d019706d866 LLVM10 anatofuz parents: diff changeset	152
1d019706d866 LLVM10 anatofuz parents: diff changeset	153 /// Copies three upper elements of the first 128-bit vector operand to
1d019706d866 LLVM10 anatofuz parents: diff changeset	154 /// the corresponding three upper elements of the 128-bit result vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	155 /// [4 x float]. Rounds down the lowest element of the second 128-bit vector
1d019706d866 LLVM10 anatofuz parents: diff changeset	156 /// operand to an integer and copies it to the lowest element of the 128-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	157 /// result vector of [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	158 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	159 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	160 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	161 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	162 /// __m128 _mm_floor_ss(__m128 X, __m128 Y);
1d019706d866 LLVM10 anatofuz parents: diff changeset	163 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	164 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	165 /// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	166 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	167 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	168 /// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are
1d019706d866 LLVM10 anatofuz parents: diff changeset	169 /// copied to the corresponding bits of the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	170 /// \param Y
1d019706d866 LLVM10 anatofuz parents: diff changeset	171 /// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is
1d019706d866 LLVM10 anatofuz parents: diff changeset	172 /// rounded down to the nearest integer and copied to the corresponding bits
1d019706d866 LLVM10 anatofuz parents: diff changeset	173 /// of the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	174 /// \returns A 128-bit vector of [4 x float] containing the copied and rounded
1d019706d866 LLVM10 anatofuz parents: diff changeset	175 /// values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	176 #define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR)
1d019706d866 LLVM10 anatofuz parents: diff changeset	177
1d019706d866 LLVM10 anatofuz parents: diff changeset	178 /// Copies the upper element of the first 128-bit vector operand to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	179 /// corresponding upper element of the 128-bit result vector of [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	180 /// Rounds down the lower element of the second 128-bit vector operand to an
1d019706d866 LLVM10 anatofuz parents: diff changeset	181 /// integer and copies it to the lower element of the 128-bit result vector
1d019706d866 LLVM10 anatofuz parents: diff changeset	182 /// of [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	183 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	184 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	185 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	186 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	187 /// __m128d _mm_floor_sd(__m128d X, __m128d Y);
1d019706d866 LLVM10 anatofuz parents: diff changeset	188 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	189 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	190 /// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	191 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	192 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	193 /// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is
1d019706d866 LLVM10 anatofuz parents: diff changeset	194 /// copied to the corresponding bits of the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	195 /// \param Y
1d019706d866 LLVM10 anatofuz parents: diff changeset	196 /// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is
1d019706d866 LLVM10 anatofuz parents: diff changeset	197 /// rounded down to the nearest integer and copied to the corresponding bits
1d019706d866 LLVM10 anatofuz parents: diff changeset	198 /// of the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	199 /// \returns A 128-bit vector of [2 x double] containing the copied and rounded
1d019706d866 LLVM10 anatofuz parents: diff changeset	200 /// values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	201 #define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)
1d019706d866 LLVM10 anatofuz parents: diff changeset	202
1d019706d866 LLVM10 anatofuz parents: diff changeset	203 /// Rounds each element of the 128-bit vector of [4 x float] to an
1d019706d866 LLVM10 anatofuz parents: diff changeset	204 /// integer value according to the rounding control specified by the second
1d019706d866 LLVM10 anatofuz parents: diff changeset	205 /// argument and returns the rounded values in a 128-bit vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	206 /// [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	207 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	208 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	209 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	210 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	211 /// __m128 _mm_round_ps(__m128 X, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	212 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	213 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	214 /// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	215 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	216 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	217 /// A 128-bit vector of [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	218 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	219 /// An integer value that specifies the rounding operation. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	220 /// Bits [7:4] are reserved. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	221 /// Bit [3] is a precision exception value: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	222 /// 0: A normal PE exception is used \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	223 /// 1: The PE field is not updated \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	224 /// Bit [2] is the rounding control source: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	225 /// 0: Use bits [1:0] of \a M \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	226 /// 1: Use the current MXCSR setting \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	227 /// Bits [1:0] contain the rounding control definition: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	228 /// 00: Nearest \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	229 /// 01: Downward (toward negative infinity) \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	230 /// 10: Upward (toward positive infinity) \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	231 /// 11: Truncated
1d019706d866 LLVM10 anatofuz parents: diff changeset	232 /// \returns A 128-bit vector of [4 x float] containing the rounded values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	233 #define _mm_round_ps(X, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	234 (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	235
1d019706d866 LLVM10 anatofuz parents: diff changeset	236 /// Copies three upper elements of the first 128-bit vector operand to
1d019706d866 LLVM10 anatofuz parents: diff changeset	237 /// the corresponding three upper elements of the 128-bit result vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	238 /// [4 x float]. Rounds the lowest element of the second 128-bit vector
1d019706d866 LLVM10 anatofuz parents: diff changeset	239 /// operand to an integer value according to the rounding control specified
1d019706d866 LLVM10 anatofuz parents: diff changeset	240 /// by the third argument and copies it to the lowest element of the 128-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	241 /// result vector of [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	242 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	243 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	244 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	245 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	246 /// __m128 _mm_round_ss(__m128 X, __m128 Y, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	247 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	248 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	249 /// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	250 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	251 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	252 /// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are
1d019706d866 LLVM10 anatofuz parents: diff changeset	253 /// copied to the corresponding bits of the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	254 /// \param Y
1d019706d866 LLVM10 anatofuz parents: diff changeset	255 /// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is
1d019706d866 LLVM10 anatofuz parents: diff changeset	256 /// rounded to the nearest integer using the specified rounding control and
1d019706d866 LLVM10 anatofuz parents: diff changeset	257 /// copied to the corresponding bits of the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	258 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	259 /// An integer value that specifies the rounding operation. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	260 /// Bits [7:4] are reserved. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	261 /// Bit [3] is a precision exception value: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	262 /// 0: A normal PE exception is used \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	263 /// 1: The PE field is not updated \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	264 /// Bit [2] is the rounding control source: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	265 /// 0: Use bits [1:0] of \a M \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	266 /// 1: Use the current MXCSR setting \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	267 /// Bits [1:0] contain the rounding control definition: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	268 /// 00: Nearest \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	269 /// 01: Downward (toward negative infinity) \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	270 /// 10: Upward (toward positive infinity) \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	271 /// 11: Truncated
1d019706d866 LLVM10 anatofuz parents: diff changeset	272 /// \returns A 128-bit vector of [4 x float] containing the copied and rounded
1d019706d866 LLVM10 anatofuz parents: diff changeset	273 /// values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	274 #define _mm_round_ss(X, Y, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	275 (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	276 (__v4sf)(__m128)(Y), (M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	277
1d019706d866 LLVM10 anatofuz parents: diff changeset	278 /// Rounds each element of the 128-bit vector of [2 x double] to an
1d019706d866 LLVM10 anatofuz parents: diff changeset	279 /// integer value according to the rounding control specified by the second
1d019706d866 LLVM10 anatofuz parents: diff changeset	280 /// argument and returns the rounded values in a 128-bit vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	281 /// [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	282 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	283 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	284 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	285 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	286 /// __m128d _mm_round_pd(__m128d X, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	287 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	288 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	289 /// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	290 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	291 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	292 /// A 128-bit vector of [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	293 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	294 /// An integer value that specifies the rounding operation. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	295 /// Bits [7:4] are reserved. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	296 /// Bit [3] is a precision exception value: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	297 /// 0: A normal PE exception is used \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	298 /// 1: The PE field is not updated \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	299 /// Bit [2] is the rounding control source: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	300 /// 0: Use bits [1:0] of \a M \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	301 /// 1: Use the current MXCSR setting \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	302 /// Bits [1:0] contain the rounding control definition: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	303 /// 00: Nearest \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	304 /// 01: Downward (toward negative infinity) \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	305 /// 10: Upward (toward positive infinity) \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	306 /// 11: Truncated
1d019706d866 LLVM10 anatofuz parents: diff changeset	307 /// \returns A 128-bit vector of [2 x double] containing the rounded values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	308 #define _mm_round_pd(X, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	309 (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	310
1d019706d866 LLVM10 anatofuz parents: diff changeset	311 /// Copies the upper element of the first 128-bit vector operand to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	312 /// corresponding upper element of the 128-bit result vector of [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	313 /// Rounds the lower element of the second 128-bit vector operand to an
1d019706d866 LLVM10 anatofuz parents: diff changeset	314 /// integer value according to the rounding control specified by the third
1d019706d866 LLVM10 anatofuz parents: diff changeset	315 /// argument and copies it to the lower element of the 128-bit result vector
1d019706d866 LLVM10 anatofuz parents: diff changeset	316 /// of [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	317 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	318 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	319 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	320 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	321 /// __m128d _mm_round_sd(__m128d X, __m128d Y, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	322 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	323 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	324 /// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	325 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	326 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	327 /// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is
1d019706d866 LLVM10 anatofuz parents: diff changeset	328 /// copied to the corresponding bits of the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	329 /// \param Y
1d019706d866 LLVM10 anatofuz parents: diff changeset	330 /// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is
1d019706d866 LLVM10 anatofuz parents: diff changeset	331 /// rounded to the nearest integer using the specified rounding control and
1d019706d866 LLVM10 anatofuz parents: diff changeset	332 /// copied to the corresponding bits of the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	333 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	334 /// An integer value that specifies the rounding operation. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	335 /// Bits [7:4] are reserved. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	336 /// Bit [3] is a precision exception value: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	337 /// 0: A normal PE exception is used \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	338 /// 1: The PE field is not updated \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	339 /// Bit [2] is the rounding control source: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	340 /// 0: Use bits [1:0] of \a M \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	341 /// 1: Use the current MXCSR setting \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	342 /// Bits [1:0] contain the rounding control definition: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	343 /// 00: Nearest \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	344 /// 01: Downward (toward negative infinity) \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	345 /// 10: Upward (toward positive infinity) \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	346 /// 11: Truncated
1d019706d866 LLVM10 anatofuz parents: diff changeset	347 /// \returns A 128-bit vector of [2 x double] containing the copied and rounded
1d019706d866 LLVM10 anatofuz parents: diff changeset	348 /// values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	349 #define _mm_round_sd(X, Y, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	350 (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	351 (__v2df)(__m128d)(Y), (M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	352
1d019706d866 LLVM10 anatofuz parents: diff changeset	353 /* SSE4 Packed Blending Intrinsics. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	354 /// Returns a 128-bit vector of [2 x double] where the values are
1d019706d866 LLVM10 anatofuz parents: diff changeset	355 /// selected from either the first or second operand as specified by the
1d019706d866 LLVM10 anatofuz parents: diff changeset	356 /// third operand, the control mask.
1d019706d866 LLVM10 anatofuz parents: diff changeset	357 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	358 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	359 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	360 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	361 /// __m128d _mm_blend_pd(__m128d V1, __m128d V2, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	362 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	363 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	364 /// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	365 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	366 /// \param V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	367 /// A 128-bit vector of [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	368 /// \param V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	369 /// A 128-bit vector of [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	370 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	371 /// An immediate integer operand, with mask bits [1:0] specifying how the
1d019706d866 LLVM10 anatofuz parents: diff changeset	372 /// values are to be copied. The position of the mask bit corresponds to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	373 /// index of a copied value. When a mask bit is 0, the corresponding 64-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	374 /// element in operand \a V1 is copied to the same position in the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	375 /// When a mask bit is 1, the corresponding 64-bit element in operand \a V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	376 /// is copied to the same position in the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	377 /// \returns A 128-bit vector of [2 x double] containing the copied values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	378 #define _mm_blend_pd(V1, V2, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	379 (__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	380 (__v2df)(__m128d)(V2), (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	381
1d019706d866 LLVM10 anatofuz parents: diff changeset	382 /// Returns a 128-bit vector of [4 x float] where the values are selected
1d019706d866 LLVM10 anatofuz parents: diff changeset	383 /// from either the first or second operand as specified by the third
1d019706d866 LLVM10 anatofuz parents: diff changeset	384 /// operand, the control mask.
1d019706d866 LLVM10 anatofuz parents: diff changeset	385 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	386 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	387 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	388 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	389 /// __m128 _mm_blend_ps(__m128 V1, __m128 V2, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	390 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	391 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	392 /// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	393 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	394 /// \param V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	395 /// A 128-bit vector of [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	396 /// \param V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	397 /// A 128-bit vector of [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	398 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	399 /// An immediate integer operand, with mask bits [3:0] specifying how the
1d019706d866 LLVM10 anatofuz parents: diff changeset	400 /// values are to be copied. The position of the mask bit corresponds to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	401 /// index of a copied value. When a mask bit is 0, the corresponding 32-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	402 /// element in operand \a V1 is copied to the same position in the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	403 /// When a mask bit is 1, the corresponding 32-bit element in operand \a V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	404 /// is copied to the same position in the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	405 /// \returns A 128-bit vector of [4 x float] containing the copied values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	406 #define _mm_blend_ps(V1, V2, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	407 (__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	408 (__v4sf)(__m128)(V2), (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	409
1d019706d866 LLVM10 anatofuz parents: diff changeset	410 /// Returns a 128-bit vector of [2 x double] where the values are
1d019706d866 LLVM10 anatofuz parents: diff changeset	411 /// selected from either the first or second operand as specified by the
1d019706d866 LLVM10 anatofuz parents: diff changeset	412 /// third operand, the control mask.
1d019706d866 LLVM10 anatofuz parents: diff changeset	413 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	414 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	415 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	416 /// This intrinsic corresponds to the <c> VBLENDVPD / BLENDVPD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	417 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	418 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	419 /// A 128-bit vector of [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	420 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	421 /// A 128-bit vector of [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	422 /// \param __M
1d019706d866 LLVM10 anatofuz parents: diff changeset	423 /// A 128-bit vector operand, with mask bits 127 and 63 specifying how the
1d019706d866 LLVM10 anatofuz parents: diff changeset	424 /// values are to be copied. The position of the mask bit corresponds to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	425 /// most significant bit of a copied value. When a mask bit is 0, the
1d019706d866 LLVM10 anatofuz parents: diff changeset	426 /// corresponding 64-bit element in operand \a __V1 is copied to the same
1d019706d866 LLVM10 anatofuz parents: diff changeset	427 /// position in the result. When a mask bit is 1, the corresponding 64-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	428 /// element in operand \a __V2 is copied to the same position in the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	429 /// \returns A 128-bit vector of [2 x double] containing the copied values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	430 static __inline__ __m128d __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	431 _mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)
1d019706d866 LLVM10 anatofuz parents: diff changeset	432 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	433 return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2,
1d019706d866 LLVM10 anatofuz parents: diff changeset	434 (__v2df)__M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	435 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	436
1d019706d866 LLVM10 anatofuz parents: diff changeset	437 /// Returns a 128-bit vector of [4 x float] where the values are
1d019706d866 LLVM10 anatofuz parents: diff changeset	438 /// selected from either the first or second operand as specified by the
1d019706d866 LLVM10 anatofuz parents: diff changeset	439 /// third operand, the control mask.
1d019706d866 LLVM10 anatofuz parents: diff changeset	440 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	441 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	442 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	443 /// This intrinsic corresponds to the <c> VBLENDVPS / BLENDVPS </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	444 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	445 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	446 /// A 128-bit vector of [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	447 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	448 /// A 128-bit vector of [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	449 /// \param __M
1d019706d866 LLVM10 anatofuz parents: diff changeset	450 /// A 128-bit vector operand, with mask bits 127, 95, 63, and 31 specifying
1d019706d866 LLVM10 anatofuz parents: diff changeset	451 /// how the values are to be copied. The position of the mask bit corresponds
1d019706d866 LLVM10 anatofuz parents: diff changeset	452 /// to the most significant bit of a copied value. When a mask bit is 0, the
1d019706d866 LLVM10 anatofuz parents: diff changeset	453 /// corresponding 32-bit element in operand \a __V1 is copied to the same
1d019706d866 LLVM10 anatofuz parents: diff changeset	454 /// position in the result. When a mask bit is 1, the corresponding 32-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	455 /// element in operand \a __V2 is copied to the same position in the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	456 /// \returns A 128-bit vector of [4 x float] containing the copied values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	457 static __inline__ __m128 __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	458 _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)
1d019706d866 LLVM10 anatofuz parents: diff changeset	459 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	460 return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2,
1d019706d866 LLVM10 anatofuz parents: diff changeset	461 (__v4sf)__M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	462 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	463
1d019706d866 LLVM10 anatofuz parents: diff changeset	464 /// Returns a 128-bit vector of [16 x i8] where the values are selected
1d019706d866 LLVM10 anatofuz parents: diff changeset	465 /// from either of the first or second operand as specified by the third
1d019706d866 LLVM10 anatofuz parents: diff changeset	466 /// operand, the control mask.
1d019706d866 LLVM10 anatofuz parents: diff changeset	467 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	468 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	469 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	470 /// This intrinsic corresponds to the <c> VPBLENDVB / PBLENDVB </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	471 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	472 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	473 /// A 128-bit vector of [16 x i8].
1d019706d866 LLVM10 anatofuz parents: diff changeset	474 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	475 /// A 128-bit vector of [16 x i8].
1d019706d866 LLVM10 anatofuz parents: diff changeset	476 /// \param __M
1d019706d866 LLVM10 anatofuz parents: diff changeset	477 /// A 128-bit vector operand, with mask bits 127, 119, 111...7 specifying
1d019706d866 LLVM10 anatofuz parents: diff changeset	478 /// how the values are to be copied. The position of the mask bit corresponds
1d019706d866 LLVM10 anatofuz parents: diff changeset	479 /// to the most significant bit of a copied value. When a mask bit is 0, the
1d019706d866 LLVM10 anatofuz parents: diff changeset	480 /// corresponding 8-bit element in operand \a __V1 is copied to the same
1d019706d866 LLVM10 anatofuz parents: diff changeset	481 /// position in the result. When a mask bit is 1, the corresponding 8-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	482 /// element in operand \a __V2 is copied to the same position in the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	483 /// \returns A 128-bit vector of [16 x i8] containing the copied values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	484 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	485 _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
1d019706d866 LLVM10 anatofuz parents: diff changeset	486 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	487 return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2,
1d019706d866 LLVM10 anatofuz parents: diff changeset	488 (__v16qi)__M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	489 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	490
1d019706d866 LLVM10 anatofuz parents: diff changeset	491 /// Returns a 128-bit vector of [8 x i16] where the values are selected
1d019706d866 LLVM10 anatofuz parents: diff changeset	492 /// from either of the first or second operand as specified by the third
1d019706d866 LLVM10 anatofuz parents: diff changeset	493 /// operand, the control mask.
1d019706d866 LLVM10 anatofuz parents: diff changeset	494 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	495 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	496 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	497 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	498 /// __m128i _mm_blend_epi16(__m128i V1, __m128i V2, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	499 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	500 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	501 /// This intrinsic corresponds to the <c> VPBLENDW / PBLENDW </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	502 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	503 /// \param V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	504 /// A 128-bit vector of [8 x i16].
1d019706d866 LLVM10 anatofuz parents: diff changeset	505 /// \param V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	506 /// A 128-bit vector of [8 x i16].
1d019706d866 LLVM10 anatofuz parents: diff changeset	507 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	508 /// An immediate integer operand, with mask bits [7:0] specifying how the
1d019706d866 LLVM10 anatofuz parents: diff changeset	509 /// values are to be copied. The position of the mask bit corresponds to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	510 /// index of a copied value. When a mask bit is 0, the corresponding 16-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	511 /// element in operand \a V1 is copied to the same position in the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	512 /// When a mask bit is 1, the corresponding 16-bit element in operand \a V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	513 /// is copied to the same position in the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	514 /// \returns A 128-bit vector of [8 x i16] containing the copied values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	515 #define _mm_blend_epi16(V1, V2, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	516 (__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	517 (__v8hi)(__m128i)(V2), (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	518
1d019706d866 LLVM10 anatofuz parents: diff changeset	519 /* SSE4 Dword Multiply Instructions. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	520 /// Multiples corresponding elements of two 128-bit vectors of [4 x i32]
1d019706d866 LLVM10 anatofuz parents: diff changeset	521 /// and returns the lower 32 bits of the each product in a 128-bit vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	522 /// [4 x i32].
1d019706d866 LLVM10 anatofuz parents: diff changeset	523 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	524 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	525 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	526 /// This intrinsic corresponds to the <c> VPMULLD / PMULLD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	527 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	528 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	529 /// A 128-bit integer vector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	530 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	531 /// A 128-bit integer vector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	532 /// \returns A 128-bit integer vector containing the products of both operands.
1d019706d866 LLVM10 anatofuz parents: diff changeset	533 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	534 _mm_mullo_epi32 (__m128i __V1, __m128i __V2)
1d019706d866 LLVM10 anatofuz parents: diff changeset	535 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	536 return (__m128i) ((__v4su)__V1 * (__v4su)__V2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	537 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	538
1d019706d866 LLVM10 anatofuz parents: diff changeset	539 /// Multiplies corresponding even-indexed elements of two 128-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	540 /// vectors of [4 x i32] and returns a 128-bit vector of [2 x i64]
1d019706d866 LLVM10 anatofuz parents: diff changeset	541 /// containing the products.
1d019706d866 LLVM10 anatofuz parents: diff changeset	542 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	543 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	544 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	545 /// This intrinsic corresponds to the <c> VPMULDQ / PMULDQ </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	546 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	547 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	548 /// A 128-bit vector of [4 x i32].
1d019706d866 LLVM10 anatofuz parents: diff changeset	549 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	550 /// A 128-bit vector of [4 x i32].
1d019706d866 LLVM10 anatofuz parents: diff changeset	551 /// \returns A 128-bit vector of [2 x i64] containing the products of both
1d019706d866 LLVM10 anatofuz parents: diff changeset	552 /// operands.
1d019706d866 LLVM10 anatofuz parents: diff changeset	553 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	554 _mm_mul_epi32 (__m128i __V1, __m128i __V2)
1d019706d866 LLVM10 anatofuz parents: diff changeset	555 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	556 return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	557 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	558
1d019706d866 LLVM10 anatofuz parents: diff changeset	559 /* SSE4 Floating Point Dot Product Instructions. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	560 /// Computes the dot product of the two 128-bit vectors of [4 x float]
1d019706d866 LLVM10 anatofuz parents: diff changeset	561 /// and returns it in the elements of the 128-bit result vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	562 /// [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	563 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	564 /// The immediate integer operand controls which input elements
1d019706d866 LLVM10 anatofuz parents: diff changeset	565 /// will contribute to the dot product, and where the final results are
1d019706d866 LLVM10 anatofuz parents: diff changeset	566 /// returned.
1d019706d866 LLVM10 anatofuz parents: diff changeset	567 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	568 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	569 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	570 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	571 /// __m128 _mm_dp_ps(__m128 X, __m128 Y, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	572 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	573 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	574 /// This intrinsic corresponds to the <c> VDPPS / DPPS </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	575 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	576 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	577 /// A 128-bit vector of [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	578 /// \param Y
1d019706d866 LLVM10 anatofuz parents: diff changeset	579 /// A 128-bit vector of [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	580 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	581 /// An immediate integer operand. Mask bits [7:4] determine which elements
1d019706d866 LLVM10 anatofuz parents: diff changeset	582 /// of the input vectors are used, with bit [4] corresponding to the lowest
1d019706d866 LLVM10 anatofuz parents: diff changeset	583 /// element and bit [7] corresponding to the highest element of each [4 x
1d019706d866 LLVM10 anatofuz parents: diff changeset	584 /// float] vector. If a bit is set, the corresponding elements from the two
1d019706d866 LLVM10 anatofuz parents: diff changeset	585 /// input vectors are used as an input for dot product; otherwise that input
1d019706d866 LLVM10 anatofuz parents: diff changeset	586 /// is treated as zero. Bits [3:0] determine which elements of the result
1d019706d866 LLVM10 anatofuz parents: diff changeset	587 /// will receive a copy of the final dot product, with bit [0] corresponding
1d019706d866 LLVM10 anatofuz parents: diff changeset	588 /// to the lowest element and bit [3] corresponding to the highest element of
1d019706d866 LLVM10 anatofuz parents: diff changeset	589 /// each [4 x float] subvector. If a bit is set, the dot product is returned
1d019706d866 LLVM10 anatofuz parents: diff changeset	590 /// in the corresponding element; otherwise that element is set to zero.
1d019706d866 LLVM10 anatofuz parents: diff changeset	591 /// \returns A 128-bit vector of [4 x float] containing the dot product.
1d019706d866 LLVM10 anatofuz parents: diff changeset	592 #define _mm_dp_ps(X, Y, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	593 (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	594 (__v4sf)(__m128)(Y), (M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	595
1d019706d866 LLVM10 anatofuz parents: diff changeset	596 /// Computes the dot product of the two 128-bit vectors of [2 x double]
1d019706d866 LLVM10 anatofuz parents: diff changeset	597 /// and returns it in the elements of the 128-bit result vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	598 /// [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	599 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	600 /// The immediate integer operand controls which input
1d019706d866 LLVM10 anatofuz parents: diff changeset	601 /// elements will contribute to the dot product, and where the final results
1d019706d866 LLVM10 anatofuz parents: diff changeset	602 /// are returned.
1d019706d866 LLVM10 anatofuz parents: diff changeset	603 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	604 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	605 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	606 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	607 /// __m128d _mm_dp_pd(__m128d X, __m128d Y, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	608 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	609 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	610 /// This intrinsic corresponds to the <c> VDPPD / DPPD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	611 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	612 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	613 /// A 128-bit vector of [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	614 /// \param Y
1d019706d866 LLVM10 anatofuz parents: diff changeset	615 /// A 128-bit vector of [2 x double].
1d019706d866 LLVM10 anatofuz parents: diff changeset	616 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	617 /// An immediate integer operand. Mask bits [5:4] determine which elements
1d019706d866 LLVM10 anatofuz parents: diff changeset	618 /// of the input vectors are used, with bit [4] corresponding to the lowest
1d019706d866 LLVM10 anatofuz parents: diff changeset	619 /// element and bit [5] corresponding to the highest element of each of [2 x
1d019706d866 LLVM10 anatofuz parents: diff changeset	620 /// double] vector. If a bit is set, the corresponding elements from the two
1d019706d866 LLVM10 anatofuz parents: diff changeset	621 /// input vectors are used as an input for dot product; otherwise that input
1d019706d866 LLVM10 anatofuz parents: diff changeset	622 /// is treated as zero. Bits [1:0] determine which elements of the result
1d019706d866 LLVM10 anatofuz parents: diff changeset	623 /// will receive a copy of the final dot product, with bit [0] corresponding
1d019706d866 LLVM10 anatofuz parents: diff changeset	624 /// to the lowest element and bit [1] corresponding to the highest element of
1d019706d866 LLVM10 anatofuz parents: diff changeset	625 /// each [2 x double] vector. If a bit is set, the dot product is returned in
1d019706d866 LLVM10 anatofuz parents: diff changeset	626 /// the corresponding element; otherwise that element is set to zero.
1d019706d866 LLVM10 anatofuz parents: diff changeset	627 #define _mm_dp_pd(X, Y, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	628 (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	629 (__v2df)(__m128d)(Y), (M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	630
1d019706d866 LLVM10 anatofuz parents: diff changeset	631 /* SSE4 Streaming Load Hint Instruction. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	632 /// Loads integer values from a 128-bit aligned memory location to a
1d019706d866 LLVM10 anatofuz parents: diff changeset	633 /// 128-bit integer vector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	634 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	635 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	636 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	637 /// This intrinsic corresponds to the <c> VMOVNTDQA / MOVNTDQA </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	638 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	639 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	640 /// A pointer to a 128-bit aligned memory location that contains the integer
1d019706d866 LLVM10 anatofuz parents: diff changeset	641 /// values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	642 /// \returns A 128-bit integer vector containing the data stored at the
1d019706d866 LLVM10 anatofuz parents: diff changeset	643 /// specified memory location.
1d019706d866 LLVM10 anatofuz parents: diff changeset	644 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	645 _mm_stream_load_si128 (__m128i const *__V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	646 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	647 return (__m128i) __builtin_nontemporal_load ((const __v2di *) __V);
1d019706d866 LLVM10 anatofuz parents: diff changeset	648 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	649
1d019706d866 LLVM10 anatofuz parents: diff changeset	650 /* SSE4 Packed Integer Min/Max Instructions. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	651 /// Compares the corresponding elements of two 128-bit vectors of
1d019706d866 LLVM10 anatofuz parents: diff changeset	652 /// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the lesser
1d019706d866 LLVM10 anatofuz parents: diff changeset	653 /// of the two values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	654 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	655 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	656 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	657 /// This intrinsic corresponds to the <c> VPMINSB / PMINSB </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	658 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	659 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	660 /// A 128-bit vector of [16 x i8].
1d019706d866 LLVM10 anatofuz parents: diff changeset	661 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	662 /// A 128-bit vector of [16 x i8]
1d019706d866 LLVM10 anatofuz parents: diff changeset	663 /// \returns A 128-bit vector of [16 x i8] containing the lesser values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	664 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	665 _mm_min_epi8 (__m128i __V1, __m128i __V2)
1d019706d866 LLVM10 anatofuz parents: diff changeset	666 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	667 return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	668 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	669
1d019706d866 LLVM10 anatofuz parents: diff changeset	670 /// Compares the corresponding elements of two 128-bit vectors of
1d019706d866 LLVM10 anatofuz parents: diff changeset	671 /// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the
1d019706d866 LLVM10 anatofuz parents: diff changeset	672 /// greater value of the two.
1d019706d866 LLVM10 anatofuz parents: diff changeset	673 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	674 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	675 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	676 /// This intrinsic corresponds to the <c> VPMAXSB / PMAXSB </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	677 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	678 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	679 /// A 128-bit vector of [16 x i8].
1d019706d866 LLVM10 anatofuz parents: diff changeset	680 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	681 /// A 128-bit vector of [16 x i8].
1d019706d866 LLVM10 anatofuz parents: diff changeset	682 /// \returns A 128-bit vector of [16 x i8] containing the greater values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	683 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	684 _mm_max_epi8 (__m128i __V1, __m128i __V2)
1d019706d866 LLVM10 anatofuz parents: diff changeset	685 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	686 return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	687 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	688
1d019706d866 LLVM10 anatofuz parents: diff changeset	689 /// Compares the corresponding elements of two 128-bit vectors of
1d019706d866 LLVM10 anatofuz parents: diff changeset	690 /// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the lesser
1d019706d866 LLVM10 anatofuz parents: diff changeset	691 /// value of the two.
1d019706d866 LLVM10 anatofuz parents: diff changeset	692 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	693 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	694 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	695 /// This intrinsic corresponds to the <c> VPMINUW / PMINUW </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	696 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	697 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	698 /// A 128-bit vector of [8 x u16].
1d019706d866 LLVM10 anatofuz parents: diff changeset	699 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	700 /// A 128-bit vector of [8 x u16].
1d019706d866 LLVM10 anatofuz parents: diff changeset	701 /// \returns A 128-bit vector of [8 x u16] containing the lesser values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	702 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	703 _mm_min_epu16 (__m128i __V1, __m128i __V2)
1d019706d866 LLVM10 anatofuz parents: diff changeset	704 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	705 return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	706 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	707
1d019706d866 LLVM10 anatofuz parents: diff changeset	708 /// Compares the corresponding elements of two 128-bit vectors of
1d019706d866 LLVM10 anatofuz parents: diff changeset	709 /// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the
1d019706d866 LLVM10 anatofuz parents: diff changeset	710 /// greater value of the two.
1d019706d866 LLVM10 anatofuz parents: diff changeset	711 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	712 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	713 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	714 /// This intrinsic corresponds to the <c> VPMAXUW / PMAXUW </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	715 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	716 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	717 /// A 128-bit vector of [8 x u16].
1d019706d866 LLVM10 anatofuz parents: diff changeset	718 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	719 /// A 128-bit vector of [8 x u16].
1d019706d866 LLVM10 anatofuz parents: diff changeset	720 /// \returns A 128-bit vector of [8 x u16] containing the greater values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	721 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	722 _mm_max_epu16 (__m128i __V1, __m128i __V2)
1d019706d866 LLVM10 anatofuz parents: diff changeset	723 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	724 return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	725 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	726
1d019706d866 LLVM10 anatofuz parents: diff changeset	727 /// Compares the corresponding elements of two 128-bit vectors of
1d019706d866 LLVM10 anatofuz parents: diff changeset	728 /// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the lesser
1d019706d866 LLVM10 anatofuz parents: diff changeset	729 /// value of the two.
1d019706d866 LLVM10 anatofuz parents: diff changeset	730 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	731 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	732 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	733 /// This intrinsic corresponds to the <c> VPMINSD / PMINSD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	734 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	735 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	736 /// A 128-bit vector of [4 x i32].
1d019706d866 LLVM10 anatofuz parents: diff changeset	737 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	738 /// A 128-bit vector of [4 x i32].
1d019706d866 LLVM10 anatofuz parents: diff changeset	739 /// \returns A 128-bit vector of [4 x i32] containing the lesser values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	740 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	741 _mm_min_epi32 (__m128i __V1, __m128i __V2)
1d019706d866 LLVM10 anatofuz parents: diff changeset	742 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	743 return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	744 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	745
1d019706d866 LLVM10 anatofuz parents: diff changeset	746 /// Compares the corresponding elements of two 128-bit vectors of
1d019706d866 LLVM10 anatofuz parents: diff changeset	747 /// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the
1d019706d866 LLVM10 anatofuz parents: diff changeset	748 /// greater value of the two.
1d019706d866 LLVM10 anatofuz parents: diff changeset	749 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	750 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	751 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	752 /// This intrinsic corresponds to the <c> VPMAXSD / PMAXSD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	753 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	754 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	755 /// A 128-bit vector of [4 x i32].
1d019706d866 LLVM10 anatofuz parents: diff changeset	756 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	757 /// A 128-bit vector of [4 x i32].
1d019706d866 LLVM10 anatofuz parents: diff changeset	758 /// \returns A 128-bit vector of [4 x i32] containing the greater values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	759 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	760 _mm_max_epi32 (__m128i __V1, __m128i __V2)
1d019706d866 LLVM10 anatofuz parents: diff changeset	761 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	762 return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	763 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	764
1d019706d866 LLVM10 anatofuz parents: diff changeset	765 /// Compares the corresponding elements of two 128-bit vectors of
1d019706d866 LLVM10 anatofuz parents: diff changeset	766 /// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the lesser
1d019706d866 LLVM10 anatofuz parents: diff changeset	767 /// value of the two.
1d019706d866 LLVM10 anatofuz parents: diff changeset	768 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	769 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	770 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	771 /// This intrinsic corresponds to the <c> VPMINUD / PMINUD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	772 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	773 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	774 /// A 128-bit vector of [4 x u32].
1d019706d866 LLVM10 anatofuz parents: diff changeset	775 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	776 /// A 128-bit vector of [4 x u32].
1d019706d866 LLVM10 anatofuz parents: diff changeset	777 /// \returns A 128-bit vector of [4 x u32] containing the lesser values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	778 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	779 _mm_min_epu32 (__m128i __V1, __m128i __V2)
1d019706d866 LLVM10 anatofuz parents: diff changeset	780 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	781 return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	782 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	783
1d019706d866 LLVM10 anatofuz parents: diff changeset	784 /// Compares the corresponding elements of two 128-bit vectors of
1d019706d866 LLVM10 anatofuz parents: diff changeset	785 /// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the
1d019706d866 LLVM10 anatofuz parents: diff changeset	786 /// greater value of the two.
1d019706d866 LLVM10 anatofuz parents: diff changeset	787 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	788 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	789 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	790 /// This intrinsic corresponds to the <c> VPMAXUD / PMAXUD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	791 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	792 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	793 /// A 128-bit vector of [4 x u32].
1d019706d866 LLVM10 anatofuz parents: diff changeset	794 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	795 /// A 128-bit vector of [4 x u32].
1d019706d866 LLVM10 anatofuz parents: diff changeset	796 /// \returns A 128-bit vector of [4 x u32] containing the greater values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	797 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	798 _mm_max_epu32 (__m128i __V1, __m128i __V2)
1d019706d866 LLVM10 anatofuz parents: diff changeset	799 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	800 return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	801 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	802
1d019706d866 LLVM10 anatofuz parents: diff changeset	803 /* SSE4 Insertion and Extraction from XMM Register Instructions. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	804 /// Takes the first argument \a X and inserts an element from the second
1d019706d866 LLVM10 anatofuz parents: diff changeset	805 /// argument \a Y as selected by the third argument \a N. That result then
1d019706d866 LLVM10 anatofuz parents: diff changeset	806 /// has elements zeroed out also as selected by the third argument \a N. The
1d019706d866 LLVM10 anatofuz parents: diff changeset	807 /// resulting 128-bit vector of [4 x float] is then returned.
1d019706d866 LLVM10 anatofuz parents: diff changeset	808 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	809 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	810 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	811 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	812 /// __m128 _mm_insert_ps(__m128 X, __m128 Y, const int N);
1d019706d866 LLVM10 anatofuz parents: diff changeset	813 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	814 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	815 /// This intrinsic corresponds to the <c> VINSERTPS </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	816 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	817 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	818 /// A 128-bit vector source operand of [4 x float]. With the exception of
1d019706d866 LLVM10 anatofuz parents: diff changeset	819 /// those bits in the result copied from parameter \a Y and zeroed by bits
1d019706d866 LLVM10 anatofuz parents: diff changeset	820 /// [3:0] of \a N, all bits from this parameter are copied to the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	821 /// \param Y
1d019706d866 LLVM10 anatofuz parents: diff changeset	822 /// A 128-bit vector source operand of [4 x float]. One single-precision
1d019706d866 LLVM10 anatofuz parents: diff changeset	823 /// floating-point element from this source, as determined by the immediate
1d019706d866 LLVM10 anatofuz parents: diff changeset	824 /// parameter, is copied to the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	825 /// \param N
1d019706d866 LLVM10 anatofuz parents: diff changeset	826 /// Specifies which bits from operand \a Y will be copied, which bits in the
1d019706d866 LLVM10 anatofuz parents: diff changeset	827 /// result they will be be copied to, and which bits in the result will be
1d019706d866 LLVM10 anatofuz parents: diff changeset	828 /// cleared. The following assignments are made: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	829 /// Bits [7:6] specify the bits to copy from operand \a Y: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	830 /// 00: Selects bits [31:0] from operand \a Y. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	831 /// 01: Selects bits [63:32] from operand \a Y. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	832 /// 10: Selects bits [95:64] from operand \a Y. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	833 /// 11: Selects bits [127:96] from operand \a Y. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	834 /// Bits [5:4] specify the bits in the result to which the selected bits
1d019706d866 LLVM10 anatofuz parents: diff changeset	835 /// from operand \a Y are copied: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	836 /// 00: Copies the selected bits from \a Y to result bits [31:0]. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	837 /// 01: Copies the selected bits from \a Y to result bits [63:32]. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	838 /// 10: Copies the selected bits from \a Y to result bits [95:64]. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	839 /// 11: Copies the selected bits from \a Y to result bits [127:96]. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	840 /// Bits[3:0]: If any of these bits are set, the corresponding result
1d019706d866 LLVM10 anatofuz parents: diff changeset	841 /// element is cleared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	842 /// \returns A 128-bit vector of [4 x float] containing the copied
1d019706d866 LLVM10 anatofuz parents: diff changeset	843 /// single-precision floating point elements from the operands.
1d019706d866 LLVM10 anatofuz parents: diff changeset	844 #define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
1d019706d866 LLVM10 anatofuz parents: diff changeset	845
1d019706d866 LLVM10 anatofuz parents: diff changeset	846 /// Extracts a 32-bit integer from a 128-bit vector of [4 x float] and
1d019706d866 LLVM10 anatofuz parents: diff changeset	847 /// returns it, using the immediate value parameter \a N as a selector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	848 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	849 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	850 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	851 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	852 /// int _mm_extract_ps(__m128 X, const int N);
1d019706d866 LLVM10 anatofuz parents: diff changeset	853 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	854 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	855 /// This intrinsic corresponds to the <c> VEXTRACTPS / EXTRACTPS </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	856 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	857 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	858 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	859 /// A 128-bit vector of [4 x float].
1d019706d866 LLVM10 anatofuz parents: diff changeset	860 /// \param N
1d019706d866 LLVM10 anatofuz parents: diff changeset	861 /// An immediate value. Bits [1:0] determines which bits from the argument
1d019706d866 LLVM10 anatofuz parents: diff changeset	862 /// \a X are extracted and returned: \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	863 /// 00: Bits [31:0] of parameter \a X are returned. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	864 /// 01: Bits [63:32] of parameter \a X are returned. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	865 /// 10: Bits [95:64] of parameter \a X are returned. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	866 /// 11: Bits [127:96] of parameter \a X are returned.
1d019706d866 LLVM10 anatofuz parents: diff changeset	867 /// \returns A 32-bit integer containing the extracted 32 bits of float data.
1d019706d866 LLVM10 anatofuz parents: diff changeset	868 #define _mm_extract_ps(X, N) (__extension__ \
1d019706d866 LLVM10 anatofuz parents: diff changeset	869 ({ union { int __i; float __f; } __t; \
1d019706d866 LLVM10 anatofuz parents: diff changeset	870 __t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \
1d019706d866 LLVM10 anatofuz parents: diff changeset	871 __t.__i;}))
1d019706d866 LLVM10 anatofuz parents: diff changeset	872
1d019706d866 LLVM10 anatofuz parents: diff changeset	873 /* Miscellaneous insert and extract macros. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	874 /* Extract a single-precision float from X at index N into D. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	875 #define _MM_EXTRACT_FLOAT(D, X, N) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	876 { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); }
1d019706d866 LLVM10 anatofuz parents: diff changeset	877
1d019706d866 LLVM10 anatofuz parents: diff changeset	878 /* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create
1d019706d866 LLVM10 anatofuz parents: diff changeset	879 an index suitable for _mm_insert_ps. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	880 #define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) \| ((Y) << 4) \| (Z))
1d019706d866 LLVM10 anatofuz parents: diff changeset	881
1d019706d866 LLVM10 anatofuz parents: diff changeset	882 /* Extract a float from X at index N into the first index of the return. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	883 #define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	884 _MM_MK_INSERTPS_NDX((N), 0, 0x0e))
1d019706d866 LLVM10 anatofuz parents: diff changeset	885
1d019706d866 LLVM10 anatofuz parents: diff changeset	886 /* Insert int into packed integer array at index. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	887 /// Constructs a 128-bit vector of [16 x i8] by first making a copy of
1d019706d866 LLVM10 anatofuz parents: diff changeset	888 /// the 128-bit integer vector parameter, and then inserting the lower 8 bits
1d019706d866 LLVM10 anatofuz parents: diff changeset	889 /// of an integer parameter \a I into an offset specified by the immediate
1d019706d866 LLVM10 anatofuz parents: diff changeset	890 /// value parameter \a N.
1d019706d866 LLVM10 anatofuz parents: diff changeset	891 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	892 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	893 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	894 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	895 /// __m128i _mm_insert_epi8(__m128i X, int I, const int N);
1d019706d866 LLVM10 anatofuz parents: diff changeset	896 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	897 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	898 /// This intrinsic corresponds to the <c> VPINSRB / PINSRB </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	899 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	900 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	901 /// A 128-bit integer vector of [16 x i8]. This vector is copied to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	902 /// result and then one of the sixteen elements in the result vector is
1d019706d866 LLVM10 anatofuz parents: diff changeset	903 /// replaced by the lower 8 bits of \a I.
1d019706d866 LLVM10 anatofuz parents: diff changeset	904 /// \param I
1d019706d866 LLVM10 anatofuz parents: diff changeset	905 /// An integer. The lower 8 bits of this operand are written to the result
1d019706d866 LLVM10 anatofuz parents: diff changeset	906 /// beginning at the offset specified by \a N.
1d019706d866 LLVM10 anatofuz parents: diff changeset	907 /// \param N
1d019706d866 LLVM10 anatofuz parents: diff changeset	908 /// An immediate value. Bits [3:0] specify the bit offset in the result at
1d019706d866 LLVM10 anatofuz parents: diff changeset	909 /// which the lower 8 bits of \a I are written. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	910 /// 0000: Bits [7:0] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	911 /// 0001: Bits [15:8] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	912 /// 0010: Bits [23:16] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	913 /// 0011: Bits [31:24] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	914 /// 0100: Bits [39:32] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	915 /// 0101: Bits [47:40] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	916 /// 0110: Bits [55:48] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	917 /// 0111: Bits [63:56] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	918 /// 1000: Bits [71:64] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	919 /// 1001: Bits [79:72] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	920 /// 1010: Bits [87:80] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	921 /// 1011: Bits [95:88] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	922 /// 1100: Bits [103:96] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	923 /// 1101: Bits [111:104] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	924 /// 1110: Bits [119:112] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	925 /// 1111: Bits [127:120] of the result are used for insertion.
1d019706d866 LLVM10 anatofuz parents: diff changeset	926 /// \returns A 128-bit integer vector containing the constructed values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	927 #define _mm_insert_epi8(X, I, N) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	928 (__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	929 (int)(I), (int)(N))
1d019706d866 LLVM10 anatofuz parents: diff changeset	930
1d019706d866 LLVM10 anatofuz parents: diff changeset	931 /// Constructs a 128-bit vector of [4 x i32] by first making a copy of
1d019706d866 LLVM10 anatofuz parents: diff changeset	932 /// the 128-bit integer vector parameter, and then inserting the 32-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	933 /// integer parameter \a I at the offset specified by the immediate value
1d019706d866 LLVM10 anatofuz parents: diff changeset	934 /// parameter \a N.
1d019706d866 LLVM10 anatofuz parents: diff changeset	935 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	936 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	937 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	938 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	939 /// __m128i _mm_insert_epi32(__m128i X, int I, const int N);
1d019706d866 LLVM10 anatofuz parents: diff changeset	940 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	941 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	942 /// This intrinsic corresponds to the <c> VPINSRD / PINSRD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	943 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	944 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	945 /// A 128-bit integer vector of [4 x i32]. This vector is copied to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	946 /// result and then one of the four elements in the result vector is
1d019706d866 LLVM10 anatofuz parents: diff changeset	947 /// replaced by \a I.
1d019706d866 LLVM10 anatofuz parents: diff changeset	948 /// \param I
1d019706d866 LLVM10 anatofuz parents: diff changeset	949 /// A 32-bit integer that is written to the result beginning at the offset
1d019706d866 LLVM10 anatofuz parents: diff changeset	950 /// specified by \a N.
1d019706d866 LLVM10 anatofuz parents: diff changeset	951 /// \param N
1d019706d866 LLVM10 anatofuz parents: diff changeset	952 /// An immediate value. Bits [1:0] specify the bit offset in the result at
1d019706d866 LLVM10 anatofuz parents: diff changeset	953 /// which the integer \a I is written. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	954 /// 00: Bits [31:0] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	955 /// 01: Bits [63:32] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	956 /// 10: Bits [95:64] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	957 /// 11: Bits [127:96] of the result are used for insertion.
1d019706d866 LLVM10 anatofuz parents: diff changeset	958 /// \returns A 128-bit integer vector containing the constructed values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	959 #define _mm_insert_epi32(X, I, N) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	960 (__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	961 (int)(I), (int)(N))
1d019706d866 LLVM10 anatofuz parents: diff changeset	962
1d019706d866 LLVM10 anatofuz parents: diff changeset	963 #ifdef __x86_64__
1d019706d866 LLVM10 anatofuz parents: diff changeset	964 /// Constructs a 128-bit vector of [2 x i64] by first making a copy of
1d019706d866 LLVM10 anatofuz parents: diff changeset	965 /// the 128-bit integer vector parameter, and then inserting the 64-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	966 /// integer parameter \a I, using the immediate value parameter \a N as an
1d019706d866 LLVM10 anatofuz parents: diff changeset	967 /// insertion location selector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	968 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	969 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	970 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	971 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	972 /// __m128i _mm_insert_epi64(__m128i X, long long I, const int N);
1d019706d866 LLVM10 anatofuz parents: diff changeset	973 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	974 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	975 /// This intrinsic corresponds to the <c> VPINSRQ / PINSRQ </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	976 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	977 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	978 /// A 128-bit integer vector of [2 x i64]. This vector is copied to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	979 /// result and then one of the two elements in the result vector is replaced
1d019706d866 LLVM10 anatofuz parents: diff changeset	980 /// by \a I.
1d019706d866 LLVM10 anatofuz parents: diff changeset	981 /// \param I
1d019706d866 LLVM10 anatofuz parents: diff changeset	982 /// A 64-bit integer that is written to the result beginning at the offset
1d019706d866 LLVM10 anatofuz parents: diff changeset	983 /// specified by \a N.
1d019706d866 LLVM10 anatofuz parents: diff changeset	984 /// \param N
1d019706d866 LLVM10 anatofuz parents: diff changeset	985 /// An immediate value. Bit [0] specifies the bit offset in the result at
1d019706d866 LLVM10 anatofuz parents: diff changeset	986 /// which the integer \a I is written. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	987 /// 0: Bits [63:0] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	988 /// 1: Bits [127:64] of the result are used for insertion. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	989 /// \returns A 128-bit integer vector containing the constructed values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	990 #define _mm_insert_epi64(X, I, N) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	991 (__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	992 (long long)(I), (int)(N))
1d019706d866 LLVM10 anatofuz parents: diff changeset	993 #endif /* __x86_64__ */
1d019706d866 LLVM10 anatofuz parents: diff changeset	994
1d019706d866 LLVM10 anatofuz parents: diff changeset	995 /* Extract int from packed integer array at index. This returns the element
1d019706d866 LLVM10 anatofuz parents: diff changeset	996 * as a zero extended value, so it is unsigned.
1d019706d866 LLVM10 anatofuz parents: diff changeset	997 */
1d019706d866 LLVM10 anatofuz parents: diff changeset	998 /// Extracts an 8-bit element from the 128-bit integer vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	999 /// [16 x i8], using the immediate value parameter \a N as a selector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1000 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1001 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1002 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1003 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1004 /// int _mm_extract_epi8(__m128i X, const int N);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1005 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1006 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1007 /// This intrinsic corresponds to the <c> VPEXTRB / PEXTRB </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1008 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1009 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	1010 /// A 128-bit integer vector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1011 /// \param N
1d019706d866 LLVM10 anatofuz parents: diff changeset	1012 /// An immediate value. Bits [3:0] specify which 8-bit vector element from
1d019706d866 LLVM10 anatofuz parents: diff changeset	1013 /// the argument \a X to extract and copy to the result. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1014 /// 0000: Bits [7:0] of parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1015 /// 0001: Bits [15:8] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1016 /// 0010: Bits [23:16] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1017 /// 0011: Bits [31:24] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1018 /// 0100: Bits [39:32] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1019 /// 0101: Bits [47:40] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1020 /// 0110: Bits [55:48] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1021 /// 0111: Bits [63:56] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1022 /// 1000: Bits [71:64] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1023 /// 1001: Bits [79:72] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1024 /// 1010: Bits [87:80] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1025 /// 1011: Bits [95:88] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1026 /// 1100: Bits [103:96] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1027 /// 1101: Bits [111:104] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1028 /// 1110: Bits [119:112] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1029 /// 1111: Bits [127:120] of the parameter \a X are extracted.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1030 /// \returns An unsigned integer, whose lower 8 bits are selected from the
1d019706d866 LLVM10 anatofuz parents: diff changeset	1031 /// 128-bit integer vector parameter and the remaining bits are assigned
1d019706d866 LLVM10 anatofuz parents: diff changeset	1032 /// zeros.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1033 #define _mm_extract_epi8(X, N) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1034 (int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1035 (int)(N))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1036
1d019706d866 LLVM10 anatofuz parents: diff changeset	1037 /// Extracts a 32-bit element from the 128-bit integer vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	1038 /// [4 x i32], using the immediate value parameter \a N as a selector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1039 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1040 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1041 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1042 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1043 /// int _mm_extract_epi32(__m128i X, const int N);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1044 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1045 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1046 /// This intrinsic corresponds to the <c> VPEXTRD / PEXTRD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1047 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1048 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	1049 /// A 128-bit integer vector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1050 /// \param N
1d019706d866 LLVM10 anatofuz parents: diff changeset	1051 /// An immediate value. Bits [1:0] specify which 32-bit vector element from
1d019706d866 LLVM10 anatofuz parents: diff changeset	1052 /// the argument \a X to extract and copy to the result. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1053 /// 00: Bits [31:0] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1054 /// 01: Bits [63:32] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1055 /// 10: Bits [95:64] of the parameter \a X are extracted. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1056 /// 11: Bits [127:96] of the parameter \a X are exracted.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1057 /// \returns An integer, whose lower 32 bits are selected from the 128-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	1058 /// integer vector parameter and the remaining bits are assigned zeros.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1059 #define _mm_extract_epi32(X, N) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1060 (int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1061
1d019706d866 LLVM10 anatofuz parents: diff changeset	1062 #ifdef __x86_64__
1d019706d866 LLVM10 anatofuz parents: diff changeset	1063 /// Extracts a 64-bit element from the 128-bit integer vector of
1d019706d866 LLVM10 anatofuz parents: diff changeset	1064 /// [2 x i64], using the immediate value parameter \a N as a selector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1065 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1066 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1067 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1068 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1069 /// long long _mm_extract_epi64(__m128i X, const int N);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1070 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1071 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1072 /// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1073 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1074 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	1075 /// A 128-bit integer vector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1076 /// \param N
1d019706d866 LLVM10 anatofuz parents: diff changeset	1077 /// An immediate value. Bit [0] specifies which 64-bit vector element from
1d019706d866 LLVM10 anatofuz parents: diff changeset	1078 /// the argument \a X to return. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1079 /// 0: Bits [63:0] are returned. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1080 /// 1: Bits [127:64] are returned. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1081 /// \returns A 64-bit integer.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1082 #define _mm_extract_epi64(X, N) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1083 (long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1084 #endif /* __x86_64 */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1085
1d019706d866 LLVM10 anatofuz parents: diff changeset	1086 /* SSE4 128-bit Packed Integer Comparisons. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1087 /// Tests whether the specified bits in a 128-bit integer vector are all
1d019706d866 LLVM10 anatofuz parents: diff changeset	1088 /// zeros.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1089 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1090 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1091 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1092 /// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1093 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1094 /// \param __M
1d019706d866 LLVM10 anatofuz parents: diff changeset	1095 /// A 128-bit integer vector containing the bits to be tested.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1096 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1097 /// A 128-bit integer vector selecting which bits to test in operand \a __M.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1098 /// \returns TRUE if the specified bits are all zeros; FALSE otherwise.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1099 static __inline__ int __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1100 _mm_testz_si128(__m128i __M, __m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1101 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1102 return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1103 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1104
1d019706d866 LLVM10 anatofuz parents: diff changeset	1105 /// Tests whether the specified bits in a 128-bit integer vector are all
1d019706d866 LLVM10 anatofuz parents: diff changeset	1106 /// ones.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1107 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1108 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1109 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1110 /// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1111 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1112 /// \param __M
1d019706d866 LLVM10 anatofuz parents: diff changeset	1113 /// A 128-bit integer vector containing the bits to be tested.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1114 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1115 /// A 128-bit integer vector selecting which bits to test in operand \a __M.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1116 /// \returns TRUE if the specified bits are all ones; FALSE otherwise.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1117 static __inline__ int __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1118 _mm_testc_si128(__m128i __M, __m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1119 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1120 return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1121 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1122
1d019706d866 LLVM10 anatofuz parents: diff changeset	1123 /// Tests whether the specified bits in a 128-bit integer vector are
1d019706d866 LLVM10 anatofuz parents: diff changeset	1124 /// neither all zeros nor all ones.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1125 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1126 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1127 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1128 /// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1129 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1130 /// \param __M
1d019706d866 LLVM10 anatofuz parents: diff changeset	1131 /// A 128-bit integer vector containing the bits to be tested.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1132 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1133 /// A 128-bit integer vector selecting which bits to test in operand \a __M.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1134 /// \returns TRUE if the specified bits are neither all zeros nor all ones;
1d019706d866 LLVM10 anatofuz parents: diff changeset	1135 /// FALSE otherwise.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1136 static __inline__ int __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1137 _mm_testnzc_si128(__m128i __M, __m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1138 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1139 return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1140 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1141
1d019706d866 LLVM10 anatofuz parents: diff changeset	1142 /// Tests whether the specified bits in a 128-bit integer vector are all
1d019706d866 LLVM10 anatofuz parents: diff changeset	1143 /// ones.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1144 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1145 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1146 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1147 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1148 /// int _mm_test_all_ones(__m128i V);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1149 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1150 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1151 /// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1152 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1153 /// \param V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1154 /// A 128-bit integer vector containing the bits to be tested.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1155 /// \returns TRUE if the bits specified in the operand are all set to 1; FALSE
1d019706d866 LLVM10 anatofuz parents: diff changeset	1156 /// otherwise.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1157 #define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1158
1d019706d866 LLVM10 anatofuz parents: diff changeset	1159 /// Tests whether the specified bits in a 128-bit integer vector are
1d019706d866 LLVM10 anatofuz parents: diff changeset	1160 /// neither all zeros nor all ones.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1161 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1162 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1163 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1164 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1165 /// int _mm_test_mix_ones_zeros(__m128i M, __m128i V);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1166 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1167 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1168 /// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1169 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1170 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	1171 /// A 128-bit integer vector containing the bits to be tested.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1172 /// \param V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1173 /// A 128-bit integer vector selecting which bits to test in operand \a M.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1174 /// \returns TRUE if the specified bits are neither all zeros nor all ones;
1d019706d866 LLVM10 anatofuz parents: diff changeset	1175 /// FALSE otherwise.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1176 #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1177
1d019706d866 LLVM10 anatofuz parents: diff changeset	1178 /// Tests whether the specified bits in a 128-bit integer vector are all
1d019706d866 LLVM10 anatofuz parents: diff changeset	1179 /// zeros.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1180 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1181 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1182 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1183 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1184 /// int _mm_test_all_zeros(__m128i M, __m128i V);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1185 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1186 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1187 /// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1188 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1189 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	1190 /// A 128-bit integer vector containing the bits to be tested.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1191 /// \param V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1192 /// A 128-bit integer vector selecting which bits to test in operand \a M.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1193 /// \returns TRUE if the specified bits are all zeros; FALSE otherwise.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1194 #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1195
1d019706d866 LLVM10 anatofuz parents: diff changeset	1196 /* SSE4 64-bit Packed Integer Comparisons. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1197 /// Compares each of the corresponding 64-bit values of the 128-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	1198 /// integer vectors for equality.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1199 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1200 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1201 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1202 /// This intrinsic corresponds to the <c> VPCMPEQQ / PCMPEQQ </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1203 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1204 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	1205 /// A 128-bit integer vector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1206 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	1207 /// A 128-bit integer vector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1208 /// \returns A 128-bit integer vector containing the comparison results.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1209 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1210 _mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1211 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1212 return (__m128i)((__v2di)__V1 == (__v2di)__V2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1213 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1214
1d019706d866 LLVM10 anatofuz parents: diff changeset	1215 /* SSE4 Packed Integer Sign-Extension. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1216 /// Sign-extends each of the lower eight 8-bit integer elements of a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1217 /// 128-bit vector of [16 x i8] to 16-bit values and returns them in a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1218 /// 128-bit vector of [8 x i16]. The upper eight elements of the input vector
1d019706d866 LLVM10 anatofuz parents: diff changeset	1219 /// are unused.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1220 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1221 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1222 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1223 /// This intrinsic corresponds to the <c> VPMOVSXBW / PMOVSXBW </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1224 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1225 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1226 /// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are sign-
1d019706d866 LLVM10 anatofuz parents: diff changeset	1227 /// extended to 16-bit values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1228 /// \returns A 128-bit vector of [8 x i16] containing the sign-extended values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1229 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1230 _mm_cvtepi8_epi16(__m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1231 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1232 /* This function always performs a signed extension, but __v16qi is a char
1d019706d866 LLVM10 anatofuz parents: diff changeset	1233 which may be signed or unsigned, so use __v16qs. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1234 return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1235 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1236
1d019706d866 LLVM10 anatofuz parents: diff changeset	1237 /// Sign-extends each of the lower four 8-bit integer elements of a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1238 /// 128-bit vector of [16 x i8] to 32-bit values and returns them in a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1239 /// 128-bit vector of [4 x i32]. The upper twelve elements of the input
1d019706d866 LLVM10 anatofuz parents: diff changeset	1240 /// vector are unused.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1241 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1242 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1243 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1244 /// This intrinsic corresponds to the <c> VPMOVSXBD / PMOVSXBD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1245 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1246 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1247 /// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are
1d019706d866 LLVM10 anatofuz parents: diff changeset	1248 /// sign-extended to 32-bit values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1249 /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1250 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1251 _mm_cvtepi8_epi32(__m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1252 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1253 /* This function always performs a signed extension, but __v16qi is a char
1d019706d866 LLVM10 anatofuz parents: diff changeset	1254 which may be signed or unsigned, so use __v16qs. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1255 return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1256 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1257
1d019706d866 LLVM10 anatofuz parents: diff changeset	1258 /// Sign-extends each of the lower two 8-bit integer elements of a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1259 /// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in
1d019706d866 LLVM10 anatofuz parents: diff changeset	1260 /// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input
1d019706d866 LLVM10 anatofuz parents: diff changeset	1261 /// vector are unused.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1262 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1263 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1264 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1265 /// This intrinsic corresponds to the <c> VPMOVSXBQ / PMOVSXBQ </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1266 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1267 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1268 /// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are
1d019706d866 LLVM10 anatofuz parents: diff changeset	1269 /// sign-extended to 64-bit values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1270 /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1271 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1272 _mm_cvtepi8_epi64(__m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1273 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1274 /* This function always performs a signed extension, but __v16qi is a char
1d019706d866 LLVM10 anatofuz parents: diff changeset	1275 which may be signed or unsigned, so use __v16qs. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1276 return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1277 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1278
1d019706d866 LLVM10 anatofuz parents: diff changeset	1279 /// Sign-extends each of the lower four 16-bit integer elements of a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1280 /// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in
1d019706d866 LLVM10 anatofuz parents: diff changeset	1281 /// a 128-bit vector of [4 x i32]. The upper four elements of the input
1d019706d866 LLVM10 anatofuz parents: diff changeset	1282 /// vector are unused.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1283 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1284 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1285 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1286 /// This intrinsic corresponds to the <c> VPMOVSXWD / PMOVSXWD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1287 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1288 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1289 /// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are
1d019706d866 LLVM10 anatofuz parents: diff changeset	1290 /// sign-extended to 32-bit values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1291 /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1292 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1293 _mm_cvtepi16_epi32(__m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1294 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1295 return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1296 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1297
1d019706d866 LLVM10 anatofuz parents: diff changeset	1298 /// Sign-extends each of the lower two 16-bit integer elements of a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1299 /// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in
1d019706d866 LLVM10 anatofuz parents: diff changeset	1300 /// a 128-bit vector of [2 x i64]. The upper six elements of the input
1d019706d866 LLVM10 anatofuz parents: diff changeset	1301 /// vector are unused.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1302 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1303 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1304 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1305 /// This intrinsic corresponds to the <c> VPMOVSXWQ / PMOVSXWQ </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1306 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1307 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1308 /// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are
1d019706d866 LLVM10 anatofuz parents: diff changeset	1309 /// sign-extended to 64-bit values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1310 /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1311 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1312 _mm_cvtepi16_epi64(__m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1313 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1314 return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1315 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1316
1d019706d866 LLVM10 anatofuz parents: diff changeset	1317 /// Sign-extends each of the lower two 32-bit integer elements of a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1318 /// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in
1d019706d866 LLVM10 anatofuz parents: diff changeset	1319 /// a 128-bit vector of [2 x i64]. The upper two elements of the input vector
1d019706d866 LLVM10 anatofuz parents: diff changeset	1320 /// are unused.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1321 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1322 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1323 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1324 /// This intrinsic corresponds to the <c> VPMOVSXDQ / PMOVSXDQ </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1325 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1326 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1327 /// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are
1d019706d866 LLVM10 anatofuz parents: diff changeset	1328 /// sign-extended to 64-bit values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1329 /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1330 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1331 _mm_cvtepi32_epi64(__m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1332 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1333 return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1334 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1335
1d019706d866 LLVM10 anatofuz parents: diff changeset	1336 /* SSE4 Packed Integer Zero-Extension. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1337 /// Zero-extends each of the lower eight 8-bit integer elements of a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1338 /// 128-bit vector of [16 x i8] to 16-bit values and returns them in a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1339 /// 128-bit vector of [8 x i16]. The upper eight elements of the input vector
1d019706d866 LLVM10 anatofuz parents: diff changeset	1340 /// are unused.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1341 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1342 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1343 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1344 /// This intrinsic corresponds to the <c> VPMOVZXBW / PMOVZXBW </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1345 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1346 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1347 /// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are
1d019706d866 LLVM10 anatofuz parents: diff changeset	1348 /// zero-extended to 16-bit values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1349 /// \returns A 128-bit vector of [8 x i16] containing the zero-extended values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1350 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1351 _mm_cvtepu8_epi16(__m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1352 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1353 return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1354 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1355
1d019706d866 LLVM10 anatofuz parents: diff changeset	1356 /// Zero-extends each of the lower four 8-bit integer elements of a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1357 /// 128-bit vector of [16 x i8] to 32-bit values and returns them in a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1358 /// 128-bit vector of [4 x i32]. The upper twelve elements of the input
1d019706d866 LLVM10 anatofuz parents: diff changeset	1359 /// vector are unused.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1360 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1361 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1362 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1363 /// This intrinsic corresponds to the <c> VPMOVZXBD / PMOVZXBD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1364 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1365 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1366 /// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are
1d019706d866 LLVM10 anatofuz parents: diff changeset	1367 /// zero-extended to 32-bit values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1368 /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1369 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1370 _mm_cvtepu8_epi32(__m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1371 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1372 return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1373 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1374
1d019706d866 LLVM10 anatofuz parents: diff changeset	1375 /// Zero-extends each of the lower two 8-bit integer elements of a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1376 /// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in
1d019706d866 LLVM10 anatofuz parents: diff changeset	1377 /// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input
1d019706d866 LLVM10 anatofuz parents: diff changeset	1378 /// vector are unused.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1379 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1380 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1381 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1382 /// This intrinsic corresponds to the <c> VPMOVZXBQ / PMOVZXBQ </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1383 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1384 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1385 /// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are
1d019706d866 LLVM10 anatofuz parents: diff changeset	1386 /// zero-extended to 64-bit values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1387 /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1388 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1389 _mm_cvtepu8_epi64(__m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1390 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1391 return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1392 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1393
1d019706d866 LLVM10 anatofuz parents: diff changeset	1394 /// Zero-extends each of the lower four 16-bit integer elements of a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1395 /// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in
1d019706d866 LLVM10 anatofuz parents: diff changeset	1396 /// a 128-bit vector of [4 x i32]. The upper four elements of the input
1d019706d866 LLVM10 anatofuz parents: diff changeset	1397 /// vector are unused.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1398 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1399 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1400 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1401 /// This intrinsic corresponds to the <c> VPMOVZXWD / PMOVZXWD </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1402 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1403 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1404 /// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are
1d019706d866 LLVM10 anatofuz parents: diff changeset	1405 /// zero-extended to 32-bit values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1406 /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1407 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1408 _mm_cvtepu16_epi32(__m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1409 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1410 return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1411 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1412
1d019706d866 LLVM10 anatofuz parents: diff changeset	1413 /// Zero-extends each of the lower two 16-bit integer elements of a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1414 /// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in
1d019706d866 LLVM10 anatofuz parents: diff changeset	1415 /// a 128-bit vector of [2 x i64]. The upper six elements of the input vector
1d019706d866 LLVM10 anatofuz parents: diff changeset	1416 /// are unused.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1417 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1418 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1419 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1420 /// This intrinsic corresponds to the <c> VPMOVZXWQ / PMOVZXWQ </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1421 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1422 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1423 /// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are
1d019706d866 LLVM10 anatofuz parents: diff changeset	1424 /// zero-extended to 64-bit values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1425 /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1426 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1427 _mm_cvtepu16_epi64(__m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1428 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1429 return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1430 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1431
1d019706d866 LLVM10 anatofuz parents: diff changeset	1432 /// Zero-extends each of the lower two 32-bit integer elements of a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1433 /// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in
1d019706d866 LLVM10 anatofuz parents: diff changeset	1434 /// a 128-bit vector of [2 x i64]. The upper two elements of the input vector
1d019706d866 LLVM10 anatofuz parents: diff changeset	1435 /// are unused.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1436 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1437 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1438 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1439 /// This intrinsic corresponds to the <c> VPMOVZXDQ / PMOVZXDQ </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1440 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1441 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1442 /// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are
1d019706d866 LLVM10 anatofuz parents: diff changeset	1443 /// zero-extended to 64-bit values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1444 /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1445 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1446 _mm_cvtepu32_epi64(__m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1447 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1448 return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1449 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1450
1d019706d866 LLVM10 anatofuz parents: diff changeset	1451 /* SSE4 Pack with Unsigned Saturation. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1452 /// Converts 32-bit signed integers from both 128-bit integer vector
1d019706d866 LLVM10 anatofuz parents: diff changeset	1453 /// operands into 16-bit unsigned integers, and returns the packed result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1454 /// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than
1d019706d866 LLVM10 anatofuz parents: diff changeset	1455 /// 0x0000 are saturated to 0x0000.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1456 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1457 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1458 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1459 /// This intrinsic corresponds to the <c> VPACKUSDW / PACKUSDW </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1460 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1461 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	1462 /// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1463 /// signed integer and is converted to a 16-bit unsigned integer with
1d019706d866 LLVM10 anatofuz parents: diff changeset	1464 /// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values
1d019706d866 LLVM10 anatofuz parents: diff changeset	1465 /// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values
1d019706d866 LLVM10 anatofuz parents: diff changeset	1466 /// are written to the lower 64 bits of the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1467 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	1468 /// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a
1d019706d866 LLVM10 anatofuz parents: diff changeset	1469 /// signed integer and is converted to a 16-bit unsigned integer with
1d019706d866 LLVM10 anatofuz parents: diff changeset	1470 /// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values
1d019706d866 LLVM10 anatofuz parents: diff changeset	1471 /// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values
1d019706d866 LLVM10 anatofuz parents: diff changeset	1472 /// are written to the higher 64 bits of the result.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1473 /// \returns A 128-bit vector of [8 x i16] containing the converted values.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1474 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1475 _mm_packus_epi32(__m128i __V1, __m128i __V2)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1476 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1477 return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1478 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1479
1d019706d866 LLVM10 anatofuz parents: diff changeset	1480 /* SSE4 Multiple Packed Sums of Absolute Difference. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1481 /// Subtracts 8-bit unsigned integer values and computes the absolute
1d019706d866 LLVM10 anatofuz parents: diff changeset	1482 /// values of the differences to the corresponding bits in the destination.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1483 /// Then sums of the absolute differences are returned according to the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	1484 /// fields in the immediate operand.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1485 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1486 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1487 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1488 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1489 /// __m128i _mm_mpsadbw_epu8(__m128i X, __m128i Y, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1490 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1491 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1492 /// This intrinsic corresponds to the <c> VMPSADBW / MPSADBW </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1493 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1494 /// \param X
1d019706d866 LLVM10 anatofuz parents: diff changeset	1495 /// A 128-bit vector of [16 x i8].
1d019706d866 LLVM10 anatofuz parents: diff changeset	1496 /// \param Y
1d019706d866 LLVM10 anatofuz parents: diff changeset	1497 /// A 128-bit vector of [16 x i8].
1d019706d866 LLVM10 anatofuz parents: diff changeset	1498 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	1499 /// An 8-bit immediate operand specifying how the absolute differences are to
1d019706d866 LLVM10 anatofuz parents: diff changeset	1500 /// be calculated, according to the following algorithm:
1d019706d866 LLVM10 anatofuz parents: diff changeset	1501 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1502 /// // M2 represents bit 2 of the immediate operand
1d019706d866 LLVM10 anatofuz parents: diff changeset	1503 /// // M10 represents bits [1:0] of the immediate operand
1d019706d866 LLVM10 anatofuz parents: diff changeset	1504 /// i = M2 * 4;
1d019706d866 LLVM10 anatofuz parents: diff changeset	1505 /// j = M10 * 4;
1d019706d866 LLVM10 anatofuz parents: diff changeset	1506 /// for (k = 0; k < 8; k = k + 1) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1507 /// d0 = abs(X[i + k + 0] - Y[j + 0]);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1508 /// d1 = abs(X[i + k + 1] - Y[j + 1]);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1509 /// d2 = abs(X[i + k + 2] - Y[j + 2]);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1510 /// d3 = abs(X[i + k + 3] - Y[j + 3]);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1511 /// r[k] = d0 + d1 + d2 + d3;
1d019706d866 LLVM10 anatofuz parents: diff changeset	1512 /// }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1513 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1514 /// \returns A 128-bit integer vector containing the sums of the sets of
1d019706d866 LLVM10 anatofuz parents: diff changeset	1515 /// absolute differences between both operands.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1516 #define _mm_mpsadbw_epu8(X, Y, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1517 (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1518 (__v16qi)(__m128i)(Y), (M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1519
1d019706d866 LLVM10 anatofuz parents: diff changeset	1520 /// Finds the minimum unsigned 16-bit element in the input 128-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	1521 /// vector of [8 x u16] and returns it and along with its index.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1522 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1523 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1524 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1525 /// This intrinsic corresponds to the <c> VPHMINPOSUW / PHMINPOSUW </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1526 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1527 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1528 /// \param __V
1d019706d866 LLVM10 anatofuz parents: diff changeset	1529 /// A 128-bit vector of [8 x u16].
1d019706d866 LLVM10 anatofuz parents: diff changeset	1530 /// \returns A 128-bit value where bits [15:0] contain the minimum value found
1d019706d866 LLVM10 anatofuz parents: diff changeset	1531 /// in parameter \a __V, bits [18:16] contain the index of the minimum value
1d019706d866 LLVM10 anatofuz parents: diff changeset	1532 /// and the remaining bits are set to 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1533 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1534 _mm_minpos_epu16(__m128i __V)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1535 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1536 return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1537 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1538
1d019706d866 LLVM10 anatofuz parents: diff changeset	1539 /* Handle the sse4.2 definitions here. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1540
1d019706d866 LLVM10 anatofuz parents: diff changeset	1541 /* These definitions are normally in nmmintrin.h, but gcc puts them in here
1d019706d866 LLVM10 anatofuz parents: diff changeset	1542 so we'll do the same. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1543
1d019706d866 LLVM10 anatofuz parents: diff changeset	1544 #undef __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	1545 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1546
1d019706d866 LLVM10 anatofuz parents: diff changeset	1547 /* These specify the type of data that we're comparing. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1548 #define _SIDD_UBYTE_OPS 0x00
1d019706d866 LLVM10 anatofuz parents: diff changeset	1549 #define _SIDD_UWORD_OPS 0x01
1d019706d866 LLVM10 anatofuz parents: diff changeset	1550 #define _SIDD_SBYTE_OPS 0x02
1d019706d866 LLVM10 anatofuz parents: diff changeset	1551 #define _SIDD_SWORD_OPS 0x03
1d019706d866 LLVM10 anatofuz parents: diff changeset	1552
1d019706d866 LLVM10 anatofuz parents: diff changeset	1553 /* These specify the type of comparison operation. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1554 #define _SIDD_CMP_EQUAL_ANY 0x00
1d019706d866 LLVM10 anatofuz parents: diff changeset	1555 #define _SIDD_CMP_RANGES 0x04
1d019706d866 LLVM10 anatofuz parents: diff changeset	1556 #define _SIDD_CMP_EQUAL_EACH 0x08
1d019706d866 LLVM10 anatofuz parents: diff changeset	1557 #define _SIDD_CMP_EQUAL_ORDERED 0x0c
1d019706d866 LLVM10 anatofuz parents: diff changeset	1558
1d019706d866 LLVM10 anatofuz parents: diff changeset	1559 /* These macros specify the polarity of the operation. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1560 #define _SIDD_POSITIVE_POLARITY 0x00
1d019706d866 LLVM10 anatofuz parents: diff changeset	1561 #define _SIDD_NEGATIVE_POLARITY 0x10
1d019706d866 LLVM10 anatofuz parents: diff changeset	1562 #define _SIDD_MASKED_POSITIVE_POLARITY 0x20
1d019706d866 LLVM10 anatofuz parents: diff changeset	1563 #define _SIDD_MASKED_NEGATIVE_POLARITY 0x30
1d019706d866 LLVM10 anatofuz parents: diff changeset	1564
1d019706d866 LLVM10 anatofuz parents: diff changeset	1565 /* These macros are used in _mm_cmpXstri() to specify the return. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1566 #define _SIDD_LEAST_SIGNIFICANT 0x00
1d019706d866 LLVM10 anatofuz parents: diff changeset	1567 #define _SIDD_MOST_SIGNIFICANT 0x40
1d019706d866 LLVM10 anatofuz parents: diff changeset	1568
1d019706d866 LLVM10 anatofuz parents: diff changeset	1569 /* These macros are used in _mm_cmpXstri() to specify the return. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1570 #define _SIDD_BIT_MASK 0x00
1d019706d866 LLVM10 anatofuz parents: diff changeset	1571 #define _SIDD_UNIT_MASK 0x40
1d019706d866 LLVM10 anatofuz parents: diff changeset	1572
1d019706d866 LLVM10 anatofuz parents: diff changeset	1573 /* SSE4.2 Packed Comparison Intrinsics. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1574 /// Uses the immediate operand \a M to perform a comparison of string
1d019706d866 LLVM10 anatofuz parents: diff changeset	1575 /// data with implicitly defined lengths that is contained in source operands
1d019706d866 LLVM10 anatofuz parents: diff changeset	1576 /// \a A and \a B. Returns a 128-bit integer vector representing the result
1d019706d866 LLVM10 anatofuz parents: diff changeset	1577 /// mask of the comparison.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1578 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1579 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1580 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1581 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1582 /// __m128i _mm_cmpistrm(__m128i A, __m128i B, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1583 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1584 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1585 /// This intrinsic corresponds to the <c> VPCMPISTRM / PCMPISTRM </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1586 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1587 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1588 /// \param A
1d019706d866 LLVM10 anatofuz parents: diff changeset	1589 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1590 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1591 /// \param B
1d019706d866 LLVM10 anatofuz parents: diff changeset	1592 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1593 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1594 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	1595 /// An 8-bit immediate operand specifying whether the characters are bytes or
1d019706d866 LLVM10 anatofuz parents: diff changeset	1596 /// words, the type of comparison to perform, and the format of the return
1d019706d866 LLVM10 anatofuz parents: diff changeset	1597 /// value. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1598 /// Bits [1:0]: Determine source data format. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1599 /// 00: 16 unsigned bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1600 /// 01: 8 unsigned words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1601 /// 10: 16 signed bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1602 /// 11: 8 signed words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1603 /// Bits [3:2]: Determine comparison type and aggregation method. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1604 /// 00: Subset: Each character in \a B is compared for equality with all
1d019706d866 LLVM10 anatofuz parents: diff changeset	1605 /// the characters in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1606 /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1d019706d866 LLVM10 anatofuz parents: diff changeset	1607 /// basis is greater than or equal for even-indexed elements in \a A,
1d019706d866 LLVM10 anatofuz parents: diff changeset	1608 /// and less than or equal for odd-indexed elements in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1609 /// 10: Match: Compare each pair of corresponding characters in \a A and
1d019706d866 LLVM10 anatofuz parents: diff changeset	1610 /// \a B for equality. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1611 /// 11: Substring: Search \a B for substring matches of \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1612 /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	1613 /// mask of the comparison results. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1614 /// 00: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1615 /// 01: Negate the bit mask. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1616 /// 10: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1617 /// 11: Negate the bit mask only for bits with an index less than or equal
1d019706d866 LLVM10 anatofuz parents: diff changeset	1618 /// to the size of \a A or \a B. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1619 /// Bit [6]: Determines whether the result is zero-extended or expanded to 16
1d019706d866 LLVM10 anatofuz parents: diff changeset	1620 /// bytes. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1621 /// 0: The result is zero-extended to 16 bytes. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1622 /// 1: The result is expanded to 16 bytes (this expansion is performed by
1d019706d866 LLVM10 anatofuz parents: diff changeset	1623 /// repeating each bit 8 or 16 times).
1d019706d866 LLVM10 anatofuz parents: diff changeset	1624 /// \returns Returns a 128-bit integer vector representing the result mask of
1d019706d866 LLVM10 anatofuz parents: diff changeset	1625 /// the comparison.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1626 #define _mm_cmpistrm(A, B, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1627 (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1628 (__v16qi)(__m128i)(B), (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1629
1d019706d866 LLVM10 anatofuz parents: diff changeset	1630 /// Uses the immediate operand \a M to perform a comparison of string
1d019706d866 LLVM10 anatofuz parents: diff changeset	1631 /// data with implicitly defined lengths that is contained in source operands
1d019706d866 LLVM10 anatofuz parents: diff changeset	1632 /// \a A and \a B. Returns an integer representing the result index of the
1d019706d866 LLVM10 anatofuz parents: diff changeset	1633 /// comparison.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1634 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1635 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1636 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1637 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1638 /// int _mm_cmpistri(__m128i A, __m128i B, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1639 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1640 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1641 /// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1642 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1643 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1644 /// \param A
1d019706d866 LLVM10 anatofuz parents: diff changeset	1645 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1646 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1647 /// \param B
1d019706d866 LLVM10 anatofuz parents: diff changeset	1648 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1649 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1650 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	1651 /// An 8-bit immediate operand specifying whether the characters are bytes or
1d019706d866 LLVM10 anatofuz parents: diff changeset	1652 /// words, the type of comparison to perform, and the format of the return
1d019706d866 LLVM10 anatofuz parents: diff changeset	1653 /// value. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1654 /// Bits [1:0]: Determine source data format. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1655 /// 00: 16 unsigned bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1656 /// 01: 8 unsigned words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1657 /// 10: 16 signed bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1658 /// 11: 8 signed words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1659 /// Bits [3:2]: Determine comparison type and aggregation method. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1660 /// 00: Subset: Each character in \a B is compared for equality with all
1d019706d866 LLVM10 anatofuz parents: diff changeset	1661 /// the characters in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1662 /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1d019706d866 LLVM10 anatofuz parents: diff changeset	1663 /// basis is greater than or equal for even-indexed elements in \a A,
1d019706d866 LLVM10 anatofuz parents: diff changeset	1664 /// and less than or equal for odd-indexed elements in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1665 /// 10: Match: Compare each pair of corresponding characters in \a A and
1d019706d866 LLVM10 anatofuz parents: diff changeset	1666 /// \a B for equality. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1667 /// 11: Substring: Search B for substring matches of \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1668 /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	1669 /// mask of the comparison results. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1670 /// 00: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1671 /// 01: Negate the bit mask. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1672 /// 10: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1673 /// 11: Negate the bit mask only for bits with an index less than or equal
1d019706d866 LLVM10 anatofuz parents: diff changeset	1674 /// to the size of \a A or \a B. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1675 /// Bit [6]: Determines whether the index of the lowest set bit or the
1d019706d866 LLVM10 anatofuz parents: diff changeset	1676 /// highest set bit is returned. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1677 /// 0: The index of the least significant set bit. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1678 /// 1: The index of the most significant set bit. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1679 /// \returns Returns an integer representing the result index of the comparison.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1680 #define _mm_cmpistri(A, B, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1681 (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1682 (__v16qi)(__m128i)(B), (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1683
1d019706d866 LLVM10 anatofuz parents: diff changeset	1684 /// Uses the immediate operand \a M to perform a comparison of string
1d019706d866 LLVM10 anatofuz parents: diff changeset	1685 /// data with explicitly defined lengths that is contained in source operands
1d019706d866 LLVM10 anatofuz parents: diff changeset	1686 /// \a A and \a B. Returns a 128-bit integer vector representing the result
1d019706d866 LLVM10 anatofuz parents: diff changeset	1687 /// mask of the comparison.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1688 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1689 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1690 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1691 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1692 /// __m128i _mm_cmpestrm(__m128i A, int LA, __m128i B, int LB, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1693 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1694 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1695 /// This intrinsic corresponds to the <c> VPCMPESTRM / PCMPESTRM </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1696 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1697 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1698 /// \param A
1d019706d866 LLVM10 anatofuz parents: diff changeset	1699 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1700 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1701 /// \param LA
1d019706d866 LLVM10 anatofuz parents: diff changeset	1702 /// An integer that specifies the length of the string in \a A.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1703 /// \param B
1d019706d866 LLVM10 anatofuz parents: diff changeset	1704 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1705 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1706 /// \param LB
1d019706d866 LLVM10 anatofuz parents: diff changeset	1707 /// An integer that specifies the length of the string in \a B.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1708 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	1709 /// An 8-bit immediate operand specifying whether the characters are bytes or
1d019706d866 LLVM10 anatofuz parents: diff changeset	1710 /// words, the type of comparison to perform, and the format of the return
1d019706d866 LLVM10 anatofuz parents: diff changeset	1711 /// value. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1712 /// Bits [1:0]: Determine source data format. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1713 /// 00: 16 unsigned bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1714 /// 01: 8 unsigned words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1715 /// 10: 16 signed bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1716 /// 11: 8 signed words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1717 /// Bits [3:2]: Determine comparison type and aggregation method. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1718 /// 00: Subset: Each character in \a B is compared for equality with all
1d019706d866 LLVM10 anatofuz parents: diff changeset	1719 /// the characters in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1720 /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1d019706d866 LLVM10 anatofuz parents: diff changeset	1721 /// basis is greater than or equal for even-indexed elements in \a A,
1d019706d866 LLVM10 anatofuz parents: diff changeset	1722 /// and less than or equal for odd-indexed elements in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1723 /// 10: Match: Compare each pair of corresponding characters in \a A and
1d019706d866 LLVM10 anatofuz parents: diff changeset	1724 /// \a B for equality. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1725 /// 11: Substring: Search \a B for substring matches of \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1726 /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	1727 /// mask of the comparison results. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1728 /// 00: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1729 /// 01: Negate the bit mask. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1730 /// 10: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1731 /// 11: Negate the bit mask only for bits with an index less than or equal
1d019706d866 LLVM10 anatofuz parents: diff changeset	1732 /// to the size of \a A or \a B. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1733 /// Bit [6]: Determines whether the result is zero-extended or expanded to 16
1d019706d866 LLVM10 anatofuz parents: diff changeset	1734 /// bytes. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1735 /// 0: The result is zero-extended to 16 bytes. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1736 /// 1: The result is expanded to 16 bytes (this expansion is performed by
1d019706d866 LLVM10 anatofuz parents: diff changeset	1737 /// repeating each bit 8 or 16 times). \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1738 /// \returns Returns a 128-bit integer vector representing the result mask of
1d019706d866 LLVM10 anatofuz parents: diff changeset	1739 /// the comparison.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1740 #define _mm_cmpestrm(A, LA, B, LB, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1741 (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1742 (__v16qi)(__m128i)(B), (int)(LB), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1743 (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1744
1d019706d866 LLVM10 anatofuz parents: diff changeset	1745 /// Uses the immediate operand \a M to perform a comparison of string
1d019706d866 LLVM10 anatofuz parents: diff changeset	1746 /// data with explicitly defined lengths that is contained in source operands
1d019706d866 LLVM10 anatofuz parents: diff changeset	1747 /// \a A and \a B. Returns an integer representing the result index of the
1d019706d866 LLVM10 anatofuz parents: diff changeset	1748 /// comparison.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1749 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1750 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1751 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1752 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1753 /// int _mm_cmpestri(__m128i A, int LA, __m128i B, int LB, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1754 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1755 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1756 /// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1757 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1758 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1759 /// \param A
1d019706d866 LLVM10 anatofuz parents: diff changeset	1760 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1761 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1762 /// \param LA
1d019706d866 LLVM10 anatofuz parents: diff changeset	1763 /// An integer that specifies the length of the string in \a A.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1764 /// \param B
1d019706d866 LLVM10 anatofuz parents: diff changeset	1765 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1766 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1767 /// \param LB
1d019706d866 LLVM10 anatofuz parents: diff changeset	1768 /// An integer that specifies the length of the string in \a B.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1769 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	1770 /// An 8-bit immediate operand specifying whether the characters are bytes or
1d019706d866 LLVM10 anatofuz parents: diff changeset	1771 /// words, the type of comparison to perform, and the format of the return
1d019706d866 LLVM10 anatofuz parents: diff changeset	1772 /// value. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1773 /// Bits [1:0]: Determine source data format. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1774 /// 00: 16 unsigned bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1775 /// 01: 8 unsigned words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1776 /// 10: 16 signed bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1777 /// 11: 8 signed words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1778 /// Bits [3:2]: Determine comparison type and aggregation method. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1779 /// 00: Subset: Each character in \a B is compared for equality with all
1d019706d866 LLVM10 anatofuz parents: diff changeset	1780 /// the characters in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1781 /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1d019706d866 LLVM10 anatofuz parents: diff changeset	1782 /// basis is greater than or equal for even-indexed elements in \a A,
1d019706d866 LLVM10 anatofuz parents: diff changeset	1783 /// and less than or equal for odd-indexed elements in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1784 /// 10: Match: Compare each pair of corresponding characters in \a A and
1d019706d866 LLVM10 anatofuz parents: diff changeset	1785 /// \a B for equality. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1786 /// 11: Substring: Search B for substring matches of \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1787 /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	1788 /// mask of the comparison results. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1789 /// 00: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1790 /// 01: Negate the bit mask. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1791 /// 10: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1792 /// 11: Negate the bit mask only for bits with an index less than or equal
1d019706d866 LLVM10 anatofuz parents: diff changeset	1793 /// to the size of \a A or \a B. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1794 /// Bit [6]: Determines whether the index of the lowest set bit or the
1d019706d866 LLVM10 anatofuz parents: diff changeset	1795 /// highest set bit is returned. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1796 /// 0: The index of the least significant set bit. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1797 /// 1: The index of the most significant set bit. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1798 /// \returns Returns an integer representing the result index of the comparison.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1799 #define _mm_cmpestri(A, LA, B, LB, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1800 (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1801 (__v16qi)(__m128i)(B), (int)(LB), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1802 (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1803
1d019706d866 LLVM10 anatofuz parents: diff changeset	1804 /* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	1805 /// Uses the immediate operand \a M to perform a comparison of string
1d019706d866 LLVM10 anatofuz parents: diff changeset	1806 /// data with implicitly defined lengths that is contained in source operands
1d019706d866 LLVM10 anatofuz parents: diff changeset	1807 /// \a A and \a B. Returns 1 if the bit mask is zero and the length of the
1d019706d866 LLVM10 anatofuz parents: diff changeset	1808 /// string in \a B is the maximum, otherwise, returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1809 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1810 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1811 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1812 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1813 /// int _mm_cmpistra(__m128i A, __m128i B, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1814 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1815 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1816 /// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1817 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1818 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1819 /// \param A
1d019706d866 LLVM10 anatofuz parents: diff changeset	1820 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1821 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1822 /// \param B
1d019706d866 LLVM10 anatofuz parents: diff changeset	1823 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1824 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1825 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	1826 /// An 8-bit immediate operand specifying whether the characters are bytes or
1d019706d866 LLVM10 anatofuz parents: diff changeset	1827 /// words and the type of comparison to perform. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1828 /// Bits [1:0]: Determine source data format. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1829 /// 00: 16 unsigned bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1830 /// 01: 8 unsigned words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1831 /// 10: 16 signed bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1832 /// 11: 8 signed words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1833 /// Bits [3:2]: Determine comparison type and aggregation method. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1834 /// 00: Subset: Each character in \a B is compared for equality with all
1d019706d866 LLVM10 anatofuz parents: diff changeset	1835 /// the characters in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1836 /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1d019706d866 LLVM10 anatofuz parents: diff changeset	1837 /// basis is greater than or equal for even-indexed elements in \a A,
1d019706d866 LLVM10 anatofuz parents: diff changeset	1838 /// and less than or equal for odd-indexed elements in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1839 /// 10: Match: Compare each pair of corresponding characters in \a A and
1d019706d866 LLVM10 anatofuz parents: diff changeset	1840 /// \a B for equality. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1841 /// 11: Substring: Search \a B for substring matches of \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1842 /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	1843 /// mask of the comparison results. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1844 /// 00: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1845 /// 01: Negate the bit mask. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1846 /// 10: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1847 /// 11: Negate the bit mask only for bits with an index less than or equal
1d019706d866 LLVM10 anatofuz parents: diff changeset	1848 /// to the size of \a A or \a B. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1849 /// \returns Returns 1 if the bit mask is zero and the length of the string in
1d019706d866 LLVM10 anatofuz parents: diff changeset	1850 /// \a B is the maximum; otherwise, returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1851 #define _mm_cmpistra(A, B, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1852 (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1853 (__v16qi)(__m128i)(B), (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1854
1d019706d866 LLVM10 anatofuz parents: diff changeset	1855 /// Uses the immediate operand \a M to perform a comparison of string
1d019706d866 LLVM10 anatofuz parents: diff changeset	1856 /// data with implicitly defined lengths that is contained in source operands
1d019706d866 LLVM10 anatofuz parents: diff changeset	1857 /// \a A and \a B. Returns 1 if the bit mask is non-zero, otherwise, returns
1d019706d866 LLVM10 anatofuz parents: diff changeset	1858 /// 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1859 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1860 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1861 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1862 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1863 /// int _mm_cmpistrc(__m128i A, __m128i B, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1864 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1865 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1866 /// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1867 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1868 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1869 /// \param A
1d019706d866 LLVM10 anatofuz parents: diff changeset	1870 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1871 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1872 /// \param B
1d019706d866 LLVM10 anatofuz parents: diff changeset	1873 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1874 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1875 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	1876 /// An 8-bit immediate operand specifying whether the characters are bytes or
1d019706d866 LLVM10 anatofuz parents: diff changeset	1877 /// words and the type of comparison to perform. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1878 /// Bits [1:0]: Determine source data format. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1879 /// 00: 16 unsigned bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1880 /// 01: 8 unsigned words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1881 /// 10: 16 signed bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1882 /// 11: 8 signed words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1883 /// Bits [3:2]: Determine comparison type and aggregation method. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1884 /// 00: Subset: Each character in \a B is compared for equality with all
1d019706d866 LLVM10 anatofuz parents: diff changeset	1885 /// the characters in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1886 /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1d019706d866 LLVM10 anatofuz parents: diff changeset	1887 /// basis is greater than or equal for even-indexed elements in \a A,
1d019706d866 LLVM10 anatofuz parents: diff changeset	1888 /// and less than or equal for odd-indexed elements in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1889 /// 10: Match: Compare each pair of corresponding characters in \a A and
1d019706d866 LLVM10 anatofuz parents: diff changeset	1890 /// \a B for equality. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1891 /// 11: Substring: Search B for substring matches of \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1892 /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	1893 /// mask of the comparison results. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1894 /// 00: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1895 /// 01: Negate the bit mask. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1896 /// 10: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1897 /// 11: Negate the bit mask only for bits with an index less than or equal
1d019706d866 LLVM10 anatofuz parents: diff changeset	1898 /// to the size of \a A or \a B.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1899 /// \returns Returns 1 if the bit mask is non-zero, otherwise, returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1900 #define _mm_cmpistrc(A, B, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1901 (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1902 (__v16qi)(__m128i)(B), (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1903
1d019706d866 LLVM10 anatofuz parents: diff changeset	1904 /// Uses the immediate operand \a M to perform a comparison of string
1d019706d866 LLVM10 anatofuz parents: diff changeset	1905 /// data with implicitly defined lengths that is contained in source operands
1d019706d866 LLVM10 anatofuz parents: diff changeset	1906 /// \a A and \a B. Returns bit 0 of the resulting bit mask.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1907 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1908 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1909 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1910 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1911 /// int _mm_cmpistro(__m128i A, __m128i B, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1912 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1913 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1914 /// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1915 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1916 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1917 /// \param A
1d019706d866 LLVM10 anatofuz parents: diff changeset	1918 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1919 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1920 /// \param B
1d019706d866 LLVM10 anatofuz parents: diff changeset	1921 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1922 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1923 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	1924 /// An 8-bit immediate operand specifying whether the characters are bytes or
1d019706d866 LLVM10 anatofuz parents: diff changeset	1925 /// words and the type of comparison to perform. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1926 /// Bits [1:0]: Determine source data format. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1927 /// 00: 16 unsigned bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1928 /// 01: 8 unsigned words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1929 /// 10: 16 signed bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1930 /// 11: 8 signed words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1931 /// Bits [3:2]: Determine comparison type and aggregation method. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1932 /// 00: Subset: Each character in \a B is compared for equality with all
1d019706d866 LLVM10 anatofuz parents: diff changeset	1933 /// the characters in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1934 /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1d019706d866 LLVM10 anatofuz parents: diff changeset	1935 /// basis is greater than or equal for even-indexed elements in \a A,
1d019706d866 LLVM10 anatofuz parents: diff changeset	1936 /// and less than or equal for odd-indexed elements in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1937 /// 10: Match: Compare each pair of corresponding characters in \a A and
1d019706d866 LLVM10 anatofuz parents: diff changeset	1938 /// \a B for equality. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1939 /// 11: Substring: Search B for substring matches of \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1940 /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	1941 /// mask of the comparison results. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1942 /// 00: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1943 /// 01: Negate the bit mask. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1944 /// 10: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1945 /// 11: Negate the bit mask only for bits with an index less than or equal
1d019706d866 LLVM10 anatofuz parents: diff changeset	1946 /// to the size of \a A or \a B. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1947 /// \returns Returns bit 0 of the resulting bit mask.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1948 #define _mm_cmpistro(A, B, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1949 (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1950 (__v16qi)(__m128i)(B), (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	1951
1d019706d866 LLVM10 anatofuz parents: diff changeset	1952 /// Uses the immediate operand \a M to perform a comparison of string
1d019706d866 LLVM10 anatofuz parents: diff changeset	1953 /// data with implicitly defined lengths that is contained in source operands
1d019706d866 LLVM10 anatofuz parents: diff changeset	1954 /// \a A and \a B. Returns 1 if the length of the string in \a A is less than
1d019706d866 LLVM10 anatofuz parents: diff changeset	1955 /// the maximum, otherwise, returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1956 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1957 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1958 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1959 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	1960 /// int _mm_cmpistrs(__m128i A, __m128i B, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	1961 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	1962 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1963 /// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	1964 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1965 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	1966 /// \param A
1d019706d866 LLVM10 anatofuz parents: diff changeset	1967 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1968 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1969 /// \param B
1d019706d866 LLVM10 anatofuz parents: diff changeset	1970 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	1971 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1972 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	1973 /// An 8-bit immediate operand specifying whether the characters are bytes or
1d019706d866 LLVM10 anatofuz parents: diff changeset	1974 /// words and the type of comparison to perform. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1975 /// Bits [1:0]: Determine source data format. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1976 /// 00: 16 unsigned bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1977 /// 01: 8 unsigned words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1978 /// 10: 16 signed bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1979 /// 11: 8 signed words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1980 /// Bits [3:2]: Determine comparison type and aggregation method. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1981 /// 00: Subset: Each character in \a B is compared for equality with all
1d019706d866 LLVM10 anatofuz parents: diff changeset	1982 /// the characters in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1983 /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1d019706d866 LLVM10 anatofuz parents: diff changeset	1984 /// basis is greater than or equal for even-indexed elements in \a A,
1d019706d866 LLVM10 anatofuz parents: diff changeset	1985 /// and less than or equal for odd-indexed elements in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1986 /// 10: Match: Compare each pair of corresponding characters in \a A and
1d019706d866 LLVM10 anatofuz parents: diff changeset	1987 /// \a B for equality. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1988 /// 11: Substring: Search \a B for substring matches of \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1989 /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	1990 /// mask of the comparison results. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1991 /// 00: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1992 /// 01: Negate the bit mask. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1993 /// 10: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1994 /// 11: Negate the bit mask only for bits with an index less than or equal
1d019706d866 LLVM10 anatofuz parents: diff changeset	1995 /// to the size of \a A or \a B. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1996 /// \returns Returns 1 if the length of the string in \a A is less than the
1d019706d866 LLVM10 anatofuz parents: diff changeset	1997 /// maximum, otherwise, returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1998 #define _mm_cmpistrs(A, B, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	1999 (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2000 (__v16qi)(__m128i)(B), (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	2001
1d019706d866 LLVM10 anatofuz parents: diff changeset	2002 /// Uses the immediate operand \a M to perform a comparison of string
1d019706d866 LLVM10 anatofuz parents: diff changeset	2003 /// data with implicitly defined lengths that is contained in source operands
1d019706d866 LLVM10 anatofuz parents: diff changeset	2004 /// \a A and \a B. Returns 1 if the length of the string in \a B is less than
1d019706d866 LLVM10 anatofuz parents: diff changeset	2005 /// the maximum, otherwise, returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2006 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2007 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2008 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2009 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	2010 /// int _mm_cmpistrz(__m128i A, __m128i B, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	2011 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	2012 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2013 /// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2014 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2015 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2016 /// \param A
1d019706d866 LLVM10 anatofuz parents: diff changeset	2017 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	2018 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2019 /// \param B
1d019706d866 LLVM10 anatofuz parents: diff changeset	2020 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	2021 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2022 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	2023 /// An 8-bit immediate operand specifying whether the characters are bytes or
1d019706d866 LLVM10 anatofuz parents: diff changeset	2024 /// words and the type of comparison to perform. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2025 /// Bits [1:0]: Determine source data format. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2026 /// 00: 16 unsigned bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2027 /// 01: 8 unsigned words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2028 /// 10: 16 signed bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2029 /// 11: 8 signed words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2030 /// Bits [3:2]: Determine comparison type and aggregation method. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2031 /// 00: Subset: Each character in \a B is compared for equality with all
1d019706d866 LLVM10 anatofuz parents: diff changeset	2032 /// the characters in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2033 /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1d019706d866 LLVM10 anatofuz parents: diff changeset	2034 /// basis is greater than or equal for even-indexed elements in \a A,
1d019706d866 LLVM10 anatofuz parents: diff changeset	2035 /// and less than or equal for odd-indexed elements in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2036 /// 10: Match: Compare each pair of corresponding characters in \a A and
1d019706d866 LLVM10 anatofuz parents: diff changeset	2037 /// \a B for equality. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2038 /// 11: Substring: Search \a B for substring matches of \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2039 /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	2040 /// mask of the comparison results. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2041 /// 00: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2042 /// 01: Negate the bit mask. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2043 /// 10: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2044 /// 11: Negate the bit mask only for bits with an index less than or equal
1d019706d866 LLVM10 anatofuz parents: diff changeset	2045 /// to the size of \a A or \a B.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2046 /// \returns Returns 1 if the length of the string in \a B is less than the
1d019706d866 LLVM10 anatofuz parents: diff changeset	2047 /// maximum, otherwise, returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2048 #define _mm_cmpistrz(A, B, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2049 (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2050 (__v16qi)(__m128i)(B), (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	2051
1d019706d866 LLVM10 anatofuz parents: diff changeset	2052 /// Uses the immediate operand \a M to perform a comparison of string
1d019706d866 LLVM10 anatofuz parents: diff changeset	2053 /// data with explicitly defined lengths that is contained in source operands
1d019706d866 LLVM10 anatofuz parents: diff changeset	2054 /// \a A and \a B. Returns 1 if the bit mask is zero and the length of the
1d019706d866 LLVM10 anatofuz parents: diff changeset	2055 /// string in \a B is the maximum, otherwise, returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2056 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2057 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2058 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2059 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	2060 /// int _mm_cmpestra(__m128i A, int LA, __m128i B, int LB, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	2061 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	2062 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2063 /// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2064 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2065 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2066 /// \param A
1d019706d866 LLVM10 anatofuz parents: diff changeset	2067 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	2068 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2069 /// \param LA
1d019706d866 LLVM10 anatofuz parents: diff changeset	2070 /// An integer that specifies the length of the string in \a A.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2071 /// \param B
1d019706d866 LLVM10 anatofuz parents: diff changeset	2072 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	2073 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2074 /// \param LB
1d019706d866 LLVM10 anatofuz parents: diff changeset	2075 /// An integer that specifies the length of the string in \a B.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2076 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	2077 /// An 8-bit immediate operand specifying whether the characters are bytes or
1d019706d866 LLVM10 anatofuz parents: diff changeset	2078 /// words and the type of comparison to perform. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2079 /// Bits [1:0]: Determine source data format. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2080 /// 00: 16 unsigned bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2081 /// 01: 8 unsigned words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2082 /// 10: 16 signed bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2083 /// 11: 8 signed words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2084 /// Bits [3:2]: Determine comparison type and aggregation method. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2085 /// 00: Subset: Each character in \a B is compared for equality with all
1d019706d866 LLVM10 anatofuz parents: diff changeset	2086 /// the characters in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2087 /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1d019706d866 LLVM10 anatofuz parents: diff changeset	2088 /// basis is greater than or equal for even-indexed elements in \a A,
1d019706d866 LLVM10 anatofuz parents: diff changeset	2089 /// and less than or equal for odd-indexed elements in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2090 /// 10: Match: Compare each pair of corresponding characters in \a A and
1d019706d866 LLVM10 anatofuz parents: diff changeset	2091 /// \a B for equality. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2092 /// 11: Substring: Search \a B for substring matches of \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2093 /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	2094 /// mask of the comparison results. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2095 /// 00: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2096 /// 01: Negate the bit mask. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2097 /// 10: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2098 /// 11: Negate the bit mask only for bits with an index less than or equal
1d019706d866 LLVM10 anatofuz parents: diff changeset	2099 /// to the size of \a A or \a B.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2100 /// \returns Returns 1 if the bit mask is zero and the length of the string in
1d019706d866 LLVM10 anatofuz parents: diff changeset	2101 /// \a B is the maximum, otherwise, returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2102 #define _mm_cmpestra(A, LA, B, LB, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2103 (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2104 (__v16qi)(__m128i)(B), (int)(LB), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2105 (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	2106
1d019706d866 LLVM10 anatofuz parents: diff changeset	2107 /// Uses the immediate operand \a M to perform a comparison of string
1d019706d866 LLVM10 anatofuz parents: diff changeset	2108 /// data with explicitly defined lengths that is contained in source operands
1d019706d866 LLVM10 anatofuz parents: diff changeset	2109 /// \a A and \a B. Returns 1 if the resulting mask is non-zero, otherwise,
1d019706d866 LLVM10 anatofuz parents: diff changeset	2110 /// returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2111 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2112 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2113 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2114 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	2115 /// int _mm_cmpestrc(__m128i A, int LA, __m128i B, int LB, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	2116 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	2117 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2118 /// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2119 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2120 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2121 /// \param A
1d019706d866 LLVM10 anatofuz parents: diff changeset	2122 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	2123 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2124 /// \param LA
1d019706d866 LLVM10 anatofuz parents: diff changeset	2125 /// An integer that specifies the length of the string in \a A.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2126 /// \param B
1d019706d866 LLVM10 anatofuz parents: diff changeset	2127 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	2128 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2129 /// \param LB
1d019706d866 LLVM10 anatofuz parents: diff changeset	2130 /// An integer that specifies the length of the string in \a B.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2131 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	2132 /// An 8-bit immediate operand specifying whether the characters are bytes or
1d019706d866 LLVM10 anatofuz parents: diff changeset	2133 /// words and the type of comparison to perform. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2134 /// Bits [1:0]: Determine source data format. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2135 /// 00: 16 unsigned bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2136 /// 01: 8 unsigned words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2137 /// 10: 16 signed bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2138 /// 11: 8 signed words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2139 /// Bits [3:2]: Determine comparison type and aggregation method. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2140 /// 00: Subset: Each character in \a B is compared for equality with all
1d019706d866 LLVM10 anatofuz parents: diff changeset	2141 /// the characters in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2142 /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1d019706d866 LLVM10 anatofuz parents: diff changeset	2143 /// basis is greater than or equal for even-indexed elements in \a A,
1d019706d866 LLVM10 anatofuz parents: diff changeset	2144 /// and less than or equal for odd-indexed elements in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2145 /// 10: Match: Compare each pair of corresponding characters in \a A and
1d019706d866 LLVM10 anatofuz parents: diff changeset	2146 /// \a B for equality. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2147 /// 11: Substring: Search \a B for substring matches of \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2148 /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	2149 /// mask of the comparison results. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2150 /// 00: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2151 /// 01: Negate the bit mask. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2152 /// 10: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2153 /// 11: Negate the bit mask only for bits with an index less than or equal
1d019706d866 LLVM10 anatofuz parents: diff changeset	2154 /// to the size of \a A or \a B. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2155 /// \returns Returns 1 if the resulting mask is non-zero, otherwise, returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2156 #define _mm_cmpestrc(A, LA, B, LB, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2157 (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2158 (__v16qi)(__m128i)(B), (int)(LB), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2159 (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	2160
1d019706d866 LLVM10 anatofuz parents: diff changeset	2161 /// Uses the immediate operand \a M to perform a comparison of string
1d019706d866 LLVM10 anatofuz parents: diff changeset	2162 /// data with explicitly defined lengths that is contained in source operands
1d019706d866 LLVM10 anatofuz parents: diff changeset	2163 /// \a A and \a B. Returns bit 0 of the resulting bit mask.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2164 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2165 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2166 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2167 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	2168 /// int _mm_cmpestro(__m128i A, int LA, __m128i B, int LB, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	2169 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	2170 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2171 /// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2172 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2173 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2174 /// \param A
1d019706d866 LLVM10 anatofuz parents: diff changeset	2175 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	2176 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2177 /// \param LA
1d019706d866 LLVM10 anatofuz parents: diff changeset	2178 /// An integer that specifies the length of the string in \a A.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2179 /// \param B
1d019706d866 LLVM10 anatofuz parents: diff changeset	2180 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	2181 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2182 /// \param LB
1d019706d866 LLVM10 anatofuz parents: diff changeset	2183 /// An integer that specifies the length of the string in \a B.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2184 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	2185 /// An 8-bit immediate operand specifying whether the characters are bytes or
1d019706d866 LLVM10 anatofuz parents: diff changeset	2186 /// words and the type of comparison to perform. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2187 /// Bits [1:0]: Determine source data format. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2188 /// 00: 16 unsigned bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2189 /// 01: 8 unsigned words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2190 /// 10: 16 signed bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2191 /// 11: 8 signed words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2192 /// Bits [3:2]: Determine comparison type and aggregation method. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2193 /// 00: Subset: Each character in \a B is compared for equality with all
1d019706d866 LLVM10 anatofuz parents: diff changeset	2194 /// the characters in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2195 /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1d019706d866 LLVM10 anatofuz parents: diff changeset	2196 /// basis is greater than or equal for even-indexed elements in \a A,
1d019706d866 LLVM10 anatofuz parents: diff changeset	2197 /// and less than or equal for odd-indexed elements in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2198 /// 10: Match: Compare each pair of corresponding characters in \a A and
1d019706d866 LLVM10 anatofuz parents: diff changeset	2199 /// \a B for equality. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2200 /// 11: Substring: Search \a B for substring matches of \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2201 /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	2202 /// mask of the comparison results. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2203 /// 00: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2204 /// 01: Negate the bit mask. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2205 /// 10: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2206 /// 11: Negate the bit mask only for bits with an index less than or equal
1d019706d866 LLVM10 anatofuz parents: diff changeset	2207 /// to the size of \a A or \a B.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2208 /// \returns Returns bit 0 of the resulting bit mask.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2209 #define _mm_cmpestro(A, LA, B, LB, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2210 (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2211 (__v16qi)(__m128i)(B), (int)(LB), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2212 (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	2213
1d019706d866 LLVM10 anatofuz parents: diff changeset	2214 /// Uses the immediate operand \a M to perform a comparison of string
1d019706d866 LLVM10 anatofuz parents: diff changeset	2215 /// data with explicitly defined lengths that is contained in source operands
1d019706d866 LLVM10 anatofuz parents: diff changeset	2216 /// \a A and \a B. Returns 1 if the length of the string in \a A is less than
1d019706d866 LLVM10 anatofuz parents: diff changeset	2217 /// the maximum, otherwise, returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2218 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2219 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2220 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2221 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	2222 /// int _mm_cmpestrs(__m128i A, int LA, __m128i B, int LB, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	2223 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	2224 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2225 /// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2226 /// instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2227 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2228 /// \param A
1d019706d866 LLVM10 anatofuz parents: diff changeset	2229 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	2230 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2231 /// \param LA
1d019706d866 LLVM10 anatofuz parents: diff changeset	2232 /// An integer that specifies the length of the string in \a A.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2233 /// \param B
1d019706d866 LLVM10 anatofuz parents: diff changeset	2234 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	2235 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2236 /// \param LB
1d019706d866 LLVM10 anatofuz parents: diff changeset	2237 /// An integer that specifies the length of the string in \a B.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2238 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	2239 /// An 8-bit immediate operand specifying whether the characters are bytes or
1d019706d866 LLVM10 anatofuz parents: diff changeset	2240 /// words and the type of comparison to perform. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2241 /// Bits [1:0]: Determine source data format. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2242 /// 00: 16 unsigned bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2243 /// 01: 8 unsigned words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2244 /// 10: 16 signed bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2245 /// 11: 8 signed words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2246 /// Bits [3:2]: Determine comparison type and aggregation method. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2247 /// 00: Subset: Each character in \a B is compared for equality with all
1d019706d866 LLVM10 anatofuz parents: diff changeset	2248 /// the characters in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2249 /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1d019706d866 LLVM10 anatofuz parents: diff changeset	2250 /// basis is greater than or equal for even-indexed elements in \a A,
1d019706d866 LLVM10 anatofuz parents: diff changeset	2251 /// and less than or equal for odd-indexed elements in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2252 /// 10: Match: Compare each pair of corresponding characters in \a A and
1d019706d866 LLVM10 anatofuz parents: diff changeset	2253 /// \a B for equality. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2254 /// 11: Substring: Search \a B for substring matches of \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2255 /// Bits [5:4]: Determine whether to perform a one's complement in the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	2256 /// mask of the comparison results. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2257 /// 00: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2258 /// 01: Negate the bit mask. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2259 /// 10: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2260 /// 11: Negate the bit mask only for bits with an index less than or equal
1d019706d866 LLVM10 anatofuz parents: diff changeset	2261 /// to the size of \a A or \a B. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2262 /// \returns Returns 1 if the length of the string in \a A is less than the
1d019706d866 LLVM10 anatofuz parents: diff changeset	2263 /// maximum, otherwise, returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2264 #define _mm_cmpestrs(A, LA, B, LB, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2265 (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2266 (__v16qi)(__m128i)(B), (int)(LB), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2267 (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	2268
1d019706d866 LLVM10 anatofuz parents: diff changeset	2269 /// Uses the immediate operand \a M to perform a comparison of string
1d019706d866 LLVM10 anatofuz parents: diff changeset	2270 /// data with explicitly defined lengths that is contained in source operands
1d019706d866 LLVM10 anatofuz parents: diff changeset	2271 /// \a A and \a B. Returns 1 if the length of the string in \a B is less than
1d019706d866 LLVM10 anatofuz parents: diff changeset	2272 /// the maximum, otherwise, returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2273 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2274 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2275 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2276 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	2277 /// int _mm_cmpestrz(__m128i A, int LA, __m128i B, int LB, const int M);
1d019706d866 LLVM10 anatofuz parents: diff changeset	2278 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	2279 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2280 /// This intrinsic corresponds to the <c> VPCMPESTRI </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2281 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2282 /// \param A
1d019706d866 LLVM10 anatofuz parents: diff changeset	2283 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	2284 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2285 /// \param LA
1d019706d866 LLVM10 anatofuz parents: diff changeset	2286 /// An integer that specifies the length of the string in \a A.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2287 /// \param B
1d019706d866 LLVM10 anatofuz parents: diff changeset	2288 /// A 128-bit integer vector containing one of the source operands to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	2289 /// compared.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2290 /// \param LB
1d019706d866 LLVM10 anatofuz parents: diff changeset	2291 /// An integer that specifies the length of the string in \a B.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2292 /// \param M
1d019706d866 LLVM10 anatofuz parents: diff changeset	2293 /// An 8-bit immediate operand specifying whether the characters are bytes or
1d019706d866 LLVM10 anatofuz parents: diff changeset	2294 /// words and the type of comparison to perform. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2295 /// Bits [1:0]: Determine source data format. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2296 /// 00: 16 unsigned bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2297 /// 01: 8 unsigned words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2298 /// 10: 16 signed bytes \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2299 /// 11: 8 signed words \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2300 /// Bits [3:2]: Determine comparison type and aggregation method. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2301 /// 00: Subset: Each character in \a B is compared for equality with all
1d019706d866 LLVM10 anatofuz parents: diff changeset	2302 /// the characters in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2303 /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1d019706d866 LLVM10 anatofuz parents: diff changeset	2304 /// basis is greater than or equal for even-indexed elements in \a A,
1d019706d866 LLVM10 anatofuz parents: diff changeset	2305 /// and less than or equal for odd-indexed elements in \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2306 /// 10: Match: Compare each pair of corresponding characters in \a A and
1d019706d866 LLVM10 anatofuz parents: diff changeset	2307 /// \a B for equality. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2308 /// 11: Substring: Search \a B for substring matches of \a A. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2309 /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	2310 /// mask of the comparison results. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2311 /// 00: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2312 /// 01: Negate the bit mask. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2313 /// 10: No effect. \n
1d019706d866 LLVM10 anatofuz parents: diff changeset	2314 /// 11: Negate the bit mask only for bits with an index less than or equal
1d019706d866 LLVM10 anatofuz parents: diff changeset	2315 /// to the size of \a A or \a B.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2316 /// \returns Returns 1 if the length of the string in \a B is less than the
1d019706d866 LLVM10 anatofuz parents: diff changeset	2317 /// maximum, otherwise, returns 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2318 #define _mm_cmpestrz(A, LA, B, LB, M) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2319 (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2320 (__v16qi)(__m128i)(B), (int)(LB), \
1d019706d866 LLVM10 anatofuz parents: diff changeset	2321 (int)(M))
1d019706d866 LLVM10 anatofuz parents: diff changeset	2322
1d019706d866 LLVM10 anatofuz parents: diff changeset	2323 /* SSE4.2 Compare Packed Data -- Greater Than. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	2324 /// Compares each of the corresponding 64-bit values of the 128-bit
1d019706d866 LLVM10 anatofuz parents: diff changeset	2325 /// integer vectors to determine if the values in the first operand are
1d019706d866 LLVM10 anatofuz parents: diff changeset	2326 /// greater than those in the second operand.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2327 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2328 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2329 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2330 /// This intrinsic corresponds to the <c> VPCMPGTQ / PCMPGTQ </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2331 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2332 /// \param __V1
1d019706d866 LLVM10 anatofuz parents: diff changeset	2333 /// A 128-bit integer vector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2334 /// \param __V2
1d019706d866 LLVM10 anatofuz parents: diff changeset	2335 /// A 128-bit integer vector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2336 /// \returns A 128-bit integer vector containing the comparison results.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2337 static __inline__ __m128i __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	2338 _mm_cmpgt_epi64(__m128i __V1, __m128i __V2)
1d019706d866 LLVM10 anatofuz parents: diff changeset	2339 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	2340 return (__m128i)((__v2di)__V1 > (__v2di)__V2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	2341 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	2342
1d019706d866 LLVM10 anatofuz parents: diff changeset	2343 /* SSE4.2 Accumulate CRC32. */
1d019706d866 LLVM10 anatofuz parents: diff changeset	2344 /// Adds the unsigned integer operand to the CRC-32C checksum of the
1d019706d866 LLVM10 anatofuz parents: diff changeset	2345 /// unsigned char operand.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2346 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2347 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2348 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2349 /// This intrinsic corresponds to the <c> CRC32B </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2350 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2351 /// \param __C
1d019706d866 LLVM10 anatofuz parents: diff changeset	2352 /// An unsigned integer operand to add to the CRC-32C checksum of operand
1d019706d866 LLVM10 anatofuz parents: diff changeset	2353 /// \a __D.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2354 /// \param __D
1d019706d866 LLVM10 anatofuz parents: diff changeset	2355 /// An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2356 /// \returns The result of adding operand \a __C to the CRC-32C checksum of
1d019706d866 LLVM10 anatofuz parents: diff changeset	2357 /// operand \a __D.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2358 static __inline__ unsigned int __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	2359 _mm_crc32_u8(unsigned int __C, unsigned char __D)
1d019706d866 LLVM10 anatofuz parents: diff changeset	2360 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	2361 return __builtin_ia32_crc32qi(__C, __D);
1d019706d866 LLVM10 anatofuz parents: diff changeset	2362 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	2363
1d019706d866 LLVM10 anatofuz parents: diff changeset	2364 /// Adds the unsigned integer operand to the CRC-32C checksum of the
1d019706d866 LLVM10 anatofuz parents: diff changeset	2365 /// unsigned short operand.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2366 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2367 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2368 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2369 /// This intrinsic corresponds to the <c> CRC32W </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2370 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2371 /// \param __C
1d019706d866 LLVM10 anatofuz parents: diff changeset	2372 /// An unsigned integer operand to add to the CRC-32C checksum of operand
1d019706d866 LLVM10 anatofuz parents: diff changeset	2373 /// \a __D.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2374 /// \param __D
1d019706d866 LLVM10 anatofuz parents: diff changeset	2375 /// An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2376 /// \returns The result of adding operand \a __C to the CRC-32C checksum of
1d019706d866 LLVM10 anatofuz parents: diff changeset	2377 /// operand \a __D.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2378 static __inline__ unsigned int __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	2379 _mm_crc32_u16(unsigned int __C, unsigned short __D)
1d019706d866 LLVM10 anatofuz parents: diff changeset	2380 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	2381 return __builtin_ia32_crc32hi(__C, __D);
1d019706d866 LLVM10 anatofuz parents: diff changeset	2382 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	2383
1d019706d866 LLVM10 anatofuz parents: diff changeset	2384 /// Adds the first unsigned integer operand to the CRC-32C checksum of
1d019706d866 LLVM10 anatofuz parents: diff changeset	2385 /// the second unsigned integer operand.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2386 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2387 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2388 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2389 /// This intrinsic corresponds to the <c> CRC32L </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2390 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2391 /// \param __C
1d019706d866 LLVM10 anatofuz parents: diff changeset	2392 /// An unsigned integer operand to add to the CRC-32C checksum of operand
1d019706d866 LLVM10 anatofuz parents: diff changeset	2393 /// \a __D.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2394 /// \param __D
1d019706d866 LLVM10 anatofuz parents: diff changeset	2395 /// An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2396 /// \returns The result of adding operand \a __C to the CRC-32C checksum of
1d019706d866 LLVM10 anatofuz parents: diff changeset	2397 /// operand \a __D.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2398 static __inline__ unsigned int __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	2399 _mm_crc32_u32(unsigned int __C, unsigned int __D)
1d019706d866 LLVM10 anatofuz parents: diff changeset	2400 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	2401 return __builtin_ia32_crc32si(__C, __D);
1d019706d866 LLVM10 anatofuz parents: diff changeset	2402 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	2403
1d019706d866 LLVM10 anatofuz parents: diff changeset	2404 #ifdef __x86_64__
1d019706d866 LLVM10 anatofuz parents: diff changeset	2405 /// Adds the unsigned integer operand to the CRC-32C checksum of the
1d019706d866 LLVM10 anatofuz parents: diff changeset	2406 /// unsigned 64-bit integer operand.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2407 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2408 /// \headerfile <x86intrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2409 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2410 /// This intrinsic corresponds to the <c> CRC32Q </c> instruction.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2411 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	2412 /// \param __C
1d019706d866 LLVM10 anatofuz parents: diff changeset	2413 /// An unsigned integer operand to add to the CRC-32C checksum of operand
1d019706d866 LLVM10 anatofuz parents: diff changeset	2414 /// \a __D.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2415 /// \param __D
1d019706d866 LLVM10 anatofuz parents: diff changeset	2416 /// An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2417 /// \returns The result of adding operand \a __C to the CRC-32C checksum of
1d019706d866 LLVM10 anatofuz parents: diff changeset	2418 /// operand \a __D.
1d019706d866 LLVM10 anatofuz parents: diff changeset	2419 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	2420 _mm_crc32_u64(unsigned long long __C, unsigned long long __D)
1d019706d866 LLVM10 anatofuz parents: diff changeset	2421 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	2422 return __builtin_ia32_crc32di(__C, __D);
1d019706d866 LLVM10 anatofuz parents: diff changeset	2423 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	2424 #endif /* __x86_64__ */
1d019706d866 LLVM10 anatofuz parents: diff changeset	2425
1d019706d866 LLVM10 anatofuz parents: diff changeset	2426 #undef __DEFAULT_FN_ATTRS
1d019706d866 LLVM10 anatofuz parents: diff changeset	2427
1d019706d866 LLVM10 anatofuz parents: diff changeset	2428 #include <popcntintrin.h>
1d019706d866 LLVM10 anatofuz parents: diff changeset	2429
1d019706d866 LLVM10 anatofuz parents: diff changeset	2430 #endif /* __SMMINTRIN_H */

Mercurial > hg > CbC > CbC_llvm

annotate clang/lib/Headers/smmintrin.h @ 176:de4ac79aef9d