150
|
1 /*===---- xopintrin.h - XOP intrinsics -------------------------------------===
|
|
2 *
|
|
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 * See https://llvm.org/LICENSE.txt for license information.
|
|
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 *
|
|
7 *===-----------------------------------------------------------------------===
|
|
8 */
|
|
9
|
|
10 #ifndef __X86INTRIN_H
|
|
11 #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
|
|
12 #endif
|
|
13
|
|
14 #ifndef __XOPINTRIN_H
|
|
15 #define __XOPINTRIN_H
|
|
16
|
|
17 #include <fma4intrin.h>
|
|
18
|
|
19 /* Define the default attributes for the functions in this file. */
|
|
20 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128)))
|
|
21 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256)))
|
|
22
|
|
23 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
24 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
|
|
25 {
|
|
26 return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
|
|
27 }
|
|
28
|
|
29 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
30 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
|
|
31 {
|
|
32 return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
|
|
33 }
|
|
34
|
|
35 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
36 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
|
|
37 {
|
|
38 return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
|
|
39 }
|
|
40
|
|
41 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
42 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
|
|
43 {
|
|
44 return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
|
|
45 }
|
|
46
|
|
47 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
48 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
|
|
49 {
|
|
50 return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
|
|
51 }
|
|
52
|
|
53 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
54 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
|
|
55 {
|
|
56 return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
|
|
57 }
|
|
58
|
|
59 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
60 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
|
|
61 {
|
|
62 return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
|
|
63 }
|
|
64
|
|
65 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
66 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
|
|
67 {
|
|
68 return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
|
|
69 }
|
|
70
|
|
71 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
72 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
|
|
73 {
|
|
74 return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
|
|
75 }
|
|
76
|
|
77 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
78 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
|
|
79 {
|
|
80 return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
|
|
81 }
|
|
82
|
|
83 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
84 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
|
|
85 {
|
|
86 return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
|
|
87 }
|
|
88
|
|
89 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
90 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
|
|
91 {
|
|
92 return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
|
|
93 }
|
|
94
|
|
95 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
96 _mm_haddw_epi8(__m128i __A)
|
|
97 {
|
|
98 return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
|
|
99 }
|
|
100
|
|
101 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
102 _mm_haddd_epi8(__m128i __A)
|
|
103 {
|
|
104 return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
|
|
105 }
|
|
106
|
|
107 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
108 _mm_haddq_epi8(__m128i __A)
|
|
109 {
|
|
110 return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
|
|
111 }
|
|
112
|
|
113 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
114 _mm_haddd_epi16(__m128i __A)
|
|
115 {
|
|
116 return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
|
|
117 }
|
|
118
|
|
119 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
120 _mm_haddq_epi16(__m128i __A)
|
|
121 {
|
|
122 return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
|
|
123 }
|
|
124
|
|
125 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
126 _mm_haddq_epi32(__m128i __A)
|
|
127 {
|
|
128 return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
|
|
129 }
|
|
130
|
|
131 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
132 _mm_haddw_epu8(__m128i __A)
|
|
133 {
|
|
134 return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
|
|
135 }
|
|
136
|
|
137 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
138 _mm_haddd_epu8(__m128i __A)
|
|
139 {
|
|
140 return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
|
|
141 }
|
|
142
|
|
143 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
144 _mm_haddq_epu8(__m128i __A)
|
|
145 {
|
|
146 return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
|
|
147 }
|
|
148
|
|
149 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
150 _mm_haddd_epu16(__m128i __A)
|
|
151 {
|
|
152 return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
|
|
153 }
|
|
154
|
|
155 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
156 _mm_haddq_epu16(__m128i __A)
|
|
157 {
|
|
158 return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
|
|
159 }
|
|
160
|
|
161 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
162 _mm_haddq_epu32(__m128i __A)
|
|
163 {
|
|
164 return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
|
|
165 }
|
|
166
|
|
167 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
168 _mm_hsubw_epi8(__m128i __A)
|
|
169 {
|
|
170 return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
|
|
171 }
|
|
172
|
|
173 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
174 _mm_hsubd_epi16(__m128i __A)
|
|
175 {
|
|
176 return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
|
|
177 }
|
|
178
|
|
179 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
180 _mm_hsubq_epi32(__m128i __A)
|
|
181 {
|
|
182 return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
|
|
183 }
|
|
184
|
|
185 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
186 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
|
|
187 {
|
|
188 return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C));
|
|
189 }
|
|
190
|
|
191 static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
|
192 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
|
|
193 {
|
|
194 return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C));
|
|
195 }
|
|
196
|
|
197 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
198 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
|
|
199 {
|
|
200 return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
|
|
201 }
|
|
202
|
|
203 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
204 _mm_rot_epi8(__m128i __A, __m128i __B)
|
|
205 {
|
|
206 return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
|
|
207 }
|
|
208
|
|
209 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
210 _mm_rot_epi16(__m128i __A, __m128i __B)
|
|
211 {
|
|
212 return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
|
|
213 }
|
|
214
|
|
215 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
216 _mm_rot_epi32(__m128i __A, __m128i __B)
|
|
217 {
|
|
218 return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
|
|
219 }
|
|
220
|
|
221 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
222 _mm_rot_epi64(__m128i __A, __m128i __B)
|
|
223 {
|
|
224 return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
|
|
225 }
|
|
226
|
|
227 #define _mm_roti_epi8(A, N) \
|
|
228 (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))
|
|
229
|
|
230 #define _mm_roti_epi16(A, N) \
|
|
231 (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))
|
|
232
|
|
233 #define _mm_roti_epi32(A, N) \
|
|
234 (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))
|
|
235
|
|
236 #define _mm_roti_epi64(A, N) \
|
|
237 (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))
|
|
238
|
|
239 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
240 _mm_shl_epi8(__m128i __A, __m128i __B)
|
|
241 {
|
|
242 return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
|
|
243 }
|
|
244
|
|
245 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
246 _mm_shl_epi16(__m128i __A, __m128i __B)
|
|
247 {
|
|
248 return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
|
|
249 }
|
|
250
|
|
251 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
252 _mm_shl_epi32(__m128i __A, __m128i __B)
|
|
253 {
|
|
254 return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
|
|
255 }
|
|
256
|
|
257 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
258 _mm_shl_epi64(__m128i __A, __m128i __B)
|
|
259 {
|
|
260 return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
|
|
261 }
|
|
262
|
|
263 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
264 _mm_sha_epi8(__m128i __A, __m128i __B)
|
|
265 {
|
|
266 return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
|
|
267 }
|
|
268
|
|
269 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
270 _mm_sha_epi16(__m128i __A, __m128i __B)
|
|
271 {
|
|
272 return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
|
|
273 }
|
|
274
|
|
275 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
276 _mm_sha_epi32(__m128i __A, __m128i __B)
|
|
277 {
|
|
278 return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
|
|
279 }
|
|
280
|
|
281 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
282 _mm_sha_epi64(__m128i __A, __m128i __B)
|
|
283 {
|
|
284 return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
|
|
285 }
|
|
286
|
|
287 #define _mm_com_epu8(A, B, N) \
|
|
288 (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
|
|
289 (__v16qi)(__m128i)(B), (N))
|
|
290
|
|
291 #define _mm_com_epu16(A, B, N) \
|
|
292 (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
|
|
293 (__v8hi)(__m128i)(B), (N))
|
|
294
|
|
295 #define _mm_com_epu32(A, B, N) \
|
|
296 (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
|
|
297 (__v4si)(__m128i)(B), (N))
|
|
298
|
|
299 #define _mm_com_epu64(A, B, N) \
|
|
300 (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
|
|
301 (__v2di)(__m128i)(B), (N))
|
|
302
|
|
303 #define _mm_com_epi8(A, B, N) \
|
|
304 (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
|
|
305 (__v16qi)(__m128i)(B), (N))
|
|
306
|
|
307 #define _mm_com_epi16(A, B, N) \
|
|
308 (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
|
|
309 (__v8hi)(__m128i)(B), (N))
|
|
310
|
|
311 #define _mm_com_epi32(A, B, N) \
|
|
312 (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
|
|
313 (__v4si)(__m128i)(B), (N))
|
|
314
|
|
315 #define _mm_com_epi64(A, B, N) \
|
|
316 (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
|
|
317 (__v2di)(__m128i)(B), (N))
|
|
318
|
|
319 #define _MM_PCOMCTRL_LT 0
|
|
320 #define _MM_PCOMCTRL_LE 1
|
|
321 #define _MM_PCOMCTRL_GT 2
|
|
322 #define _MM_PCOMCTRL_GE 3
|
|
323 #define _MM_PCOMCTRL_EQ 4
|
|
324 #define _MM_PCOMCTRL_NEQ 5
|
|
325 #define _MM_PCOMCTRL_FALSE 6
|
|
326 #define _MM_PCOMCTRL_TRUE 7
|
|
327
|
|
328 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
329 _mm_comlt_epu8(__m128i __A, __m128i __B)
|
|
330 {
|
|
331 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
|
|
332 }
|
|
333
|
|
334 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
335 _mm_comle_epu8(__m128i __A, __m128i __B)
|
|
336 {
|
|
337 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
|
|
338 }
|
|
339
|
|
340 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
341 _mm_comgt_epu8(__m128i __A, __m128i __B)
|
|
342 {
|
|
343 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
|
|
344 }
|
|
345
|
|
346 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
347 _mm_comge_epu8(__m128i __A, __m128i __B)
|
|
348 {
|
|
349 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
|
|
350 }
|
|
351
|
|
352 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
353 _mm_comeq_epu8(__m128i __A, __m128i __B)
|
|
354 {
|
|
355 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
|
|
356 }
|
|
357
|
|
358 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
359 _mm_comneq_epu8(__m128i __A, __m128i __B)
|
|
360 {
|
|
361 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
|
|
362 }
|
|
363
|
|
364 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
365 _mm_comfalse_epu8(__m128i __A, __m128i __B)
|
|
366 {
|
|
367 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
|
|
368 }
|
|
369
|
|
370 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
371 _mm_comtrue_epu8(__m128i __A, __m128i __B)
|
|
372 {
|
|
373 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
|
|
374 }
|
|
375
|
|
376 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
377 _mm_comlt_epu16(__m128i __A, __m128i __B)
|
|
378 {
|
|
379 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
|
|
380 }
|
|
381
|
|
382 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
383 _mm_comle_epu16(__m128i __A, __m128i __B)
|
|
384 {
|
|
385 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
|
|
386 }
|
|
387
|
|
388 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
389 _mm_comgt_epu16(__m128i __A, __m128i __B)
|
|
390 {
|
|
391 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
|
|
392 }
|
|
393
|
|
394 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
395 _mm_comge_epu16(__m128i __A, __m128i __B)
|
|
396 {
|
|
397 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
|
|
398 }
|
|
399
|
|
400 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
401 _mm_comeq_epu16(__m128i __A, __m128i __B)
|
|
402 {
|
|
403 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
|
|
404 }
|
|
405
|
|
406 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
407 _mm_comneq_epu16(__m128i __A, __m128i __B)
|
|
408 {
|
|
409 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
|
|
410 }
|
|
411
|
|
412 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
413 _mm_comfalse_epu16(__m128i __A, __m128i __B)
|
|
414 {
|
|
415 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
|
|
416 }
|
|
417
|
|
418 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
419 _mm_comtrue_epu16(__m128i __A, __m128i __B)
|
|
420 {
|
|
421 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
|
|
422 }
|
|
423
|
|
424 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
425 _mm_comlt_epu32(__m128i __A, __m128i __B)
|
|
426 {
|
|
427 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
|
|
428 }
|
|
429
|
|
430 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
431 _mm_comle_epu32(__m128i __A, __m128i __B)
|
|
432 {
|
|
433 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
|
|
434 }
|
|
435
|
|
436 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
437 _mm_comgt_epu32(__m128i __A, __m128i __B)
|
|
438 {
|
|
439 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
|
|
440 }
|
|
441
|
|
442 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
443 _mm_comge_epu32(__m128i __A, __m128i __B)
|
|
444 {
|
|
445 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
|
|
446 }
|
|
447
|
|
448 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
449 _mm_comeq_epu32(__m128i __A, __m128i __B)
|
|
450 {
|
|
451 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
|
|
452 }
|
|
453
|
|
454 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
455 _mm_comneq_epu32(__m128i __A, __m128i __B)
|
|
456 {
|
|
457 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
|
|
458 }
|
|
459
|
|
460 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
461 _mm_comfalse_epu32(__m128i __A, __m128i __B)
|
|
462 {
|
|
463 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
|
|
464 }
|
|
465
|
|
466 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
467 _mm_comtrue_epu32(__m128i __A, __m128i __B)
|
|
468 {
|
|
469 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
|
|
470 }
|
|
471
|
|
472 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
473 _mm_comlt_epu64(__m128i __A, __m128i __B)
|
|
474 {
|
|
475 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
|
|
476 }
|
|
477
|
|
478 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
479 _mm_comle_epu64(__m128i __A, __m128i __B)
|
|
480 {
|
|
481 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
|
|
482 }
|
|
483
|
|
484 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
485 _mm_comgt_epu64(__m128i __A, __m128i __B)
|
|
486 {
|
|
487 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
|
|
488 }
|
|
489
|
|
490 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
491 _mm_comge_epu64(__m128i __A, __m128i __B)
|
|
492 {
|
|
493 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
|
|
494 }
|
|
495
|
|
496 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
497 _mm_comeq_epu64(__m128i __A, __m128i __B)
|
|
498 {
|
|
499 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
|
|
500 }
|
|
501
|
|
502 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
503 _mm_comneq_epu64(__m128i __A, __m128i __B)
|
|
504 {
|
|
505 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
|
|
506 }
|
|
507
|
|
508 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
509 _mm_comfalse_epu64(__m128i __A, __m128i __B)
|
|
510 {
|
|
511 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
|
|
512 }
|
|
513
|
|
514 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
515 _mm_comtrue_epu64(__m128i __A, __m128i __B)
|
|
516 {
|
|
517 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
|
|
518 }
|
|
519
|
|
520 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
521 _mm_comlt_epi8(__m128i __A, __m128i __B)
|
|
522 {
|
|
523 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
|
|
524 }
|
|
525
|
|
526 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
527 _mm_comle_epi8(__m128i __A, __m128i __B)
|
|
528 {
|
|
529 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
|
|
530 }
|
|
531
|
|
532 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
533 _mm_comgt_epi8(__m128i __A, __m128i __B)
|
|
534 {
|
|
535 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
|
|
536 }
|
|
537
|
|
538 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
539 _mm_comge_epi8(__m128i __A, __m128i __B)
|
|
540 {
|
|
541 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
|
|
542 }
|
|
543
|
|
544 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
545 _mm_comeq_epi8(__m128i __A, __m128i __B)
|
|
546 {
|
|
547 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
|
|
548 }
|
|
549
|
|
550 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
551 _mm_comneq_epi8(__m128i __A, __m128i __B)
|
|
552 {
|
|
553 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
|
|
554 }
|
|
555
|
|
556 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
557 _mm_comfalse_epi8(__m128i __A, __m128i __B)
|
|
558 {
|
|
559 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
|
|
560 }
|
|
561
|
|
562 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
563 _mm_comtrue_epi8(__m128i __A, __m128i __B)
|
|
564 {
|
|
565 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
|
|
566 }
|
|
567
|
|
568 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
569 _mm_comlt_epi16(__m128i __A, __m128i __B)
|
|
570 {
|
|
571 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
|
|
572 }
|
|
573
|
|
574 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
575 _mm_comle_epi16(__m128i __A, __m128i __B)
|
|
576 {
|
|
577 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
|
|
578 }
|
|
579
|
|
580 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
581 _mm_comgt_epi16(__m128i __A, __m128i __B)
|
|
582 {
|
|
583 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
|
|
584 }
|
|
585
|
|
586 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
587 _mm_comge_epi16(__m128i __A, __m128i __B)
|
|
588 {
|
|
589 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
|
|
590 }
|
|
591
|
|
592 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
593 _mm_comeq_epi16(__m128i __A, __m128i __B)
|
|
594 {
|
|
595 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
|
|
596 }
|
|
597
|
|
598 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
599 _mm_comneq_epi16(__m128i __A, __m128i __B)
|
|
600 {
|
|
601 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
|
|
602 }
|
|
603
|
|
604 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
605 _mm_comfalse_epi16(__m128i __A, __m128i __B)
|
|
606 {
|
|
607 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
|
|
608 }
|
|
609
|
|
610 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
611 _mm_comtrue_epi16(__m128i __A, __m128i __B)
|
|
612 {
|
|
613 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
|
|
614 }
|
|
615
|
|
616 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
617 _mm_comlt_epi32(__m128i __A, __m128i __B)
|
|
618 {
|
|
619 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
|
|
620 }
|
|
621
|
|
622 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
623 _mm_comle_epi32(__m128i __A, __m128i __B)
|
|
624 {
|
|
625 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
|
|
626 }
|
|
627
|
|
628 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
629 _mm_comgt_epi32(__m128i __A, __m128i __B)
|
|
630 {
|
|
631 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
|
|
632 }
|
|
633
|
|
634 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
635 _mm_comge_epi32(__m128i __A, __m128i __B)
|
|
636 {
|
|
637 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
|
|
638 }
|
|
639
|
|
640 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
641 _mm_comeq_epi32(__m128i __A, __m128i __B)
|
|
642 {
|
|
643 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
|
|
644 }
|
|
645
|
|
646 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
647 _mm_comneq_epi32(__m128i __A, __m128i __B)
|
|
648 {
|
|
649 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
|
|
650 }
|
|
651
|
|
652 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
653 _mm_comfalse_epi32(__m128i __A, __m128i __B)
|
|
654 {
|
|
655 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
|
|
656 }
|
|
657
|
|
658 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
659 _mm_comtrue_epi32(__m128i __A, __m128i __B)
|
|
660 {
|
|
661 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
|
|
662 }
|
|
663
|
|
664 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
665 _mm_comlt_epi64(__m128i __A, __m128i __B)
|
|
666 {
|
|
667 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
|
|
668 }
|
|
669
|
|
670 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
671 _mm_comle_epi64(__m128i __A, __m128i __B)
|
|
672 {
|
|
673 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
|
|
674 }
|
|
675
|
|
676 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
677 _mm_comgt_epi64(__m128i __A, __m128i __B)
|
|
678 {
|
|
679 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
|
|
680 }
|
|
681
|
|
682 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
683 _mm_comge_epi64(__m128i __A, __m128i __B)
|
|
684 {
|
|
685 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
|
|
686 }
|
|
687
|
|
688 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
689 _mm_comeq_epi64(__m128i __A, __m128i __B)
|
|
690 {
|
|
691 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
|
|
692 }
|
|
693
|
|
694 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
695 _mm_comneq_epi64(__m128i __A, __m128i __B)
|
|
696 {
|
|
697 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
|
|
698 }
|
|
699
|
|
700 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
701 _mm_comfalse_epi64(__m128i __A, __m128i __B)
|
|
702 {
|
|
703 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
|
|
704 }
|
|
705
|
|
706 static __inline__ __m128i __DEFAULT_FN_ATTRS
|
|
707 _mm_comtrue_epi64(__m128i __A, __m128i __B)
|
|
708 {
|
|
709 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
|
|
710 }
|
|
711
|
|
712 #define _mm_permute2_pd(X, Y, C, I) \
|
|
713 (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
|
|
714 (__v2df)(__m128d)(Y), \
|
|
715 (__v2di)(__m128i)(C), (I))
|
|
716
|
|
717 #define _mm256_permute2_pd(X, Y, C, I) \
|
|
718 (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
|
|
719 (__v4df)(__m256d)(Y), \
|
|
720 (__v4di)(__m256i)(C), (I))
|
|
721
|
|
722 #define _mm_permute2_ps(X, Y, C, I) \
|
|
723 (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
|
|
724 (__v4si)(__m128i)(C), (I))
|
|
725
|
|
726 #define _mm256_permute2_ps(X, Y, C, I) \
|
|
727 (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
|
|
728 (__v8sf)(__m256)(Y), \
|
|
729 (__v8si)(__m256i)(C), (I))
|
|
730
|
|
731 static __inline__ __m128 __DEFAULT_FN_ATTRS
|
|
732 _mm_frcz_ss(__m128 __A)
|
|
733 {
|
|
734 return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
|
|
735 }
|
|
736
|
|
737 static __inline__ __m128d __DEFAULT_FN_ATTRS
|
|
738 _mm_frcz_sd(__m128d __A)
|
|
739 {
|
|
740 return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
|
|
741 }
|
|
742
|
|
743 static __inline__ __m128 __DEFAULT_FN_ATTRS
|
|
744 _mm_frcz_ps(__m128 __A)
|
|
745 {
|
|
746 return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
|
|
747 }
|
|
748
|
|
749 static __inline__ __m128d __DEFAULT_FN_ATTRS
|
|
750 _mm_frcz_pd(__m128d __A)
|
|
751 {
|
|
752 return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
|
|
753 }
|
|
754
|
|
755 static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
|
756 _mm256_frcz_ps(__m256 __A)
|
|
757 {
|
|
758 return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
|
|
759 }
|
|
760
|
|
761 static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
|
762 _mm256_frcz_pd(__m256d __A)
|
|
763 {
|
|
764 return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
|
|
765 }
|
|
766
|
|
767 #undef __DEFAULT_FN_ATTRS
|
|
768 #undef __DEFAULT_FN_ATTRS256
|
|
769
|
|
770 #endif /* __XOPINTRIN_H */
|