150
|
1 /*===---- immintrin.h - Intel intrinsics -----------------------------------===
|
|
2 *
|
|
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 * See https://llvm.org/LICENSE.txt for license information.
|
|
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 *
|
|
7 *===-----------------------------------------------------------------------===
|
|
8 */
|
|
9
|
|
10 #ifndef __IMMINTRIN_H
|
|
11 #define __IMMINTRIN_H
|
|
12
|
173
|
13 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
14 defined(__MMX__)
|
150
|
15 #include <mmintrin.h>
|
|
16 #endif
|
|
17
|
173
|
18 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
19 defined(__SSE__)
|
150
|
20 #include <xmmintrin.h>
|
|
21 #endif
|
|
22
|
173
|
23 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
24 defined(__SSE2__)
|
150
|
25 #include <emmintrin.h>
|
|
26 #endif
|
|
27
|
173
|
28 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
29 defined(__SSE3__)
|
150
|
30 #include <pmmintrin.h>
|
|
31 #endif
|
|
32
|
173
|
33 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
34 defined(__SSSE3__)
|
150
|
35 #include <tmmintrin.h>
|
|
36 #endif
|
|
37
|
173
|
38 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
150
|
39 (defined(__SSE4_2__) || defined(__SSE4_1__))
|
|
40 #include <smmintrin.h>
|
|
41 #endif
|
|
42
|
173
|
43 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
150
|
44 (defined(__AES__) || defined(__PCLMUL__))
|
|
45 #include <wmmintrin.h>
|
|
46 #endif
|
|
47
|
173
|
48 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
49 defined(__CLFLUSHOPT__)
|
150
|
50 #include <clflushoptintrin.h>
|
|
51 #endif
|
|
52
|
173
|
53 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
54 defined(__CLWB__)
|
150
|
55 #include <clwbintrin.h>
|
|
56 #endif
|
|
57
|
173
|
58 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
59 defined(__AVX__)
|
150
|
60 #include <avxintrin.h>
|
|
61 #endif
|
|
62
|
173
|
63 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
64 defined(__AVX2__)
|
150
|
65 #include <avx2intrin.h>
|
|
66 #endif
|
|
67
|
173
|
68 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
69 defined(__F16C__)
|
150
|
70 #include <f16cintrin.h>
|
|
71 #endif
|
|
72
|
173
|
73 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
74 defined(__VPCLMULQDQ__)
|
150
|
75 #include <vpclmulqdqintrin.h>
|
|
76 #endif
|
|
77
|
|
78 /* No feature check desired due to internal checks */
|
|
79 #include <bmiintrin.h>
|
|
80
|
173
|
81 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
82 defined(__BMI2__)
|
150
|
83 #include <bmi2intrin.h>
|
|
84 #endif
|
|
85
|
173
|
86 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
87 defined(__LZCNT__)
|
150
|
88 #include <lzcntintrin.h>
|
|
89 #endif
|
|
90
|
173
|
91 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
92 defined(__POPCNT__)
|
150
|
93 #include <popcntintrin.h>
|
|
94 #endif
|
|
95
|
173
|
96 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
97 defined(__FMA__)
|
150
|
98 #include <fmaintrin.h>
|
|
99 #endif
|
|
100
|
173
|
101 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
102 defined(__AVX512F__)
|
150
|
103 #include <avx512fintrin.h>
|
|
104 #endif
|
|
105
|
173
|
106 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
107 defined(__AVX512VL__)
|
150
|
108 #include <avx512vlintrin.h>
|
|
109 #endif
|
|
110
|
173
|
111 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
112 defined(__AVX512BW__)
|
150
|
113 #include <avx512bwintrin.h>
|
|
114 #endif
|
|
115
|
173
|
116 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
117 defined(__AVX512BITALG__)
|
150
|
118 #include <avx512bitalgintrin.h>
|
|
119 #endif
|
|
120
|
173
|
121 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
122 defined(__AVX512CD__)
|
150
|
123 #include <avx512cdintrin.h>
|
|
124 #endif
|
|
125
|
173
|
126 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
127 defined(__AVX512VPOPCNTDQ__)
|
150
|
128 #include <avx512vpopcntdqintrin.h>
|
|
129 #endif
|
|
130
|
173
|
131 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
150
|
132 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
|
|
133 #include <avx512vpopcntdqvlintrin.h>
|
|
134 #endif
|
|
135
|
173
|
136 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
137 defined(__AVX512VNNI__)
|
150
|
138 #include <avx512vnniintrin.h>
|
|
139 #endif
|
|
140
|
173
|
141 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
150
|
142 (defined(__AVX512VL__) && defined(__AVX512VNNI__))
|
|
143 #include <avx512vlvnniintrin.h>
|
|
144 #endif
|
|
145
|
173
|
146 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
147 defined(__AVX512DQ__)
|
150
|
148 #include <avx512dqintrin.h>
|
|
149 #endif
|
|
150
|
173
|
151 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
150
|
152 (defined(__AVX512VL__) && defined(__AVX512BITALG__))
|
|
153 #include <avx512vlbitalgintrin.h>
|
|
154 #endif
|
|
155
|
173
|
156 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
150
|
157 (defined(__AVX512VL__) && defined(__AVX512BW__))
|
|
158 #include <avx512vlbwintrin.h>
|
|
159 #endif
|
|
160
|
173
|
161 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
150
|
162 (defined(__AVX512VL__) && defined(__AVX512CD__))
|
|
163 #include <avx512vlcdintrin.h>
|
|
164 #endif
|
|
165
|
173
|
166 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
150
|
167 (defined(__AVX512VL__) && defined(__AVX512DQ__))
|
|
168 #include <avx512vldqintrin.h>
|
|
169 #endif
|
|
170
|
173
|
171 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
172 defined(__AVX512ER__)
|
150
|
173 #include <avx512erintrin.h>
|
|
174 #endif
|
|
175
|
173
|
176 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
177 defined(__AVX512IFMA__)
|
150
|
178 #include <avx512ifmaintrin.h>
|
|
179 #endif
|
|
180
|
173
|
181 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
150
|
182 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
|
|
183 #include <avx512ifmavlintrin.h>
|
|
184 #endif
|
|
185
|
173
|
186 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
187 defined(__AVX512VBMI__)
|
150
|
188 #include <avx512vbmiintrin.h>
|
|
189 #endif
|
|
190
|
173
|
191 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
150
|
192 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
|
|
193 #include <avx512vbmivlintrin.h>
|
|
194 #endif
|
|
195
|
173
|
196 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
197 defined(__AVX512VBMI2__)
|
150
|
198 #include <avx512vbmi2intrin.h>
|
|
199 #endif
|
|
200
|
173
|
201 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
150
|
202 (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
|
|
203 #include <avx512vlvbmi2intrin.h>
|
|
204 #endif
|
|
205
|
173
|
206 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
207 defined(__AVX512PF__)
|
150
|
208 #include <avx512pfintrin.h>
|
|
209 #endif
|
|
210
|
173
|
211 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
212 defined(__AVX512BF16__)
|
150
|
213 #include <avx512bf16intrin.h>
|
|
214 #endif
|
|
215
|
173
|
216 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
150
|
217 (defined(__AVX512VL__) && defined(__AVX512BF16__))
|
|
218 #include <avx512vlbf16intrin.h>
|
|
219 #endif
|
|
220
|
173
|
221 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
222 defined(__PKU__)
|
150
|
223 #include <pkuintrin.h>
|
|
224 #endif
|
|
225
|
173
|
226 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
227 defined(__VAES__)
|
150
|
228 #include <vaesintrin.h>
|
|
229 #endif
|
|
230
|
173
|
231 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
232 defined(__GFNI__)
|
150
|
233 #include <gfniintrin.h>
|
|
234 #endif
|
|
235
|
173
|
236 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
237 defined(__RDPID__)
|
150
|
238 /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
|
|
239 ///
|
|
240 /// \headerfile <immintrin.h>
|
|
241 ///
|
|
242 /// This intrinsic corresponds to the <c> RDPID </c> instruction.
|
|
243 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
|
|
244 _rdpid_u32(void) {
|
|
245 return __builtin_ia32_rdpid();
|
|
246 }
|
|
247 #endif // __RDPID__
|
|
248
|
173
|
249 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
250 defined(__RDRND__)
|
150
|
251 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
|
|
252 _rdrand16_step(unsigned short *__p)
|
|
253 {
|
|
254 return __builtin_ia32_rdrand16_step(__p);
|
|
255 }
|
|
256
|
|
257 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
|
|
258 _rdrand32_step(unsigned int *__p)
|
|
259 {
|
|
260 return __builtin_ia32_rdrand32_step(__p);
|
|
261 }
|
|
262
|
|
263 #ifdef __x86_64__
|
|
264 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
|
|
265 _rdrand64_step(unsigned long long *__p)
|
|
266 {
|
|
267 return __builtin_ia32_rdrand64_step(__p);
|
|
268 }
|
|
269 #endif
|
|
270 #endif /* __RDRND__ */
|
|
271
|
173
|
272 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
273 defined(__FSGSBASE__)
|
150
|
274 #ifdef __x86_64__
|
|
275 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
|
276 _readfsbase_u32(void)
|
|
277 {
|
|
278 return __builtin_ia32_rdfsbase32();
|
|
279 }
|
|
280
|
|
281 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
|
282 _readfsbase_u64(void)
|
|
283 {
|
|
284 return __builtin_ia32_rdfsbase64();
|
|
285 }
|
|
286
|
|
287 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
|
288 _readgsbase_u32(void)
|
|
289 {
|
|
290 return __builtin_ia32_rdgsbase32();
|
|
291 }
|
|
292
|
|
293 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
|
294 _readgsbase_u64(void)
|
|
295 {
|
|
296 return __builtin_ia32_rdgsbase64();
|
|
297 }
|
|
298
|
|
299 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
|
300 _writefsbase_u32(unsigned int __V)
|
|
301 {
|
|
302 __builtin_ia32_wrfsbase32(__V);
|
|
303 }
|
|
304
|
|
305 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
|
306 _writefsbase_u64(unsigned long long __V)
|
|
307 {
|
|
308 __builtin_ia32_wrfsbase64(__V);
|
|
309 }
|
|
310
|
|
311 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
|
312 _writegsbase_u32(unsigned int __V)
|
|
313 {
|
|
314 __builtin_ia32_wrgsbase32(__V);
|
|
315 }
|
|
316
|
|
317 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
|
318 _writegsbase_u64(unsigned long long __V)
|
|
319 {
|
|
320 __builtin_ia32_wrgsbase64(__V);
|
|
321 }
|
|
322
|
|
323 #endif
|
|
324 #endif /* __FSGSBASE__ */
|
|
325
|
173
|
326 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
327 defined(__MOVBE__)
|
150
|
328
|
|
329 /* The structs used below are to force the load/store to be unaligned. This
|
|
330 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
|
|
331 * tbaa metadata from being generated based on the struct and the type of the
|
|
332 * field inside of it.
|
|
333 */
|
|
334
|
|
335 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
336 _loadbe_i16(void const * __P) {
|
|
337 struct __loadu_i16 {
|
|
338 short __v;
|
|
339 } __attribute__((__packed__, __may_alias__));
|
|
340 return __builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
|
|
341 }
|
|
342
|
|
343 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
344 _storebe_i16(void * __P, short __D) {
|
|
345 struct __storeu_i16 {
|
|
346 short __v;
|
|
347 } __attribute__((__packed__, __may_alias__));
|
|
348 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
|
|
349 }
|
|
350
|
|
351 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
352 _loadbe_i32(void const * __P) {
|
|
353 struct __loadu_i32 {
|
|
354 int __v;
|
|
355 } __attribute__((__packed__, __may_alias__));
|
|
356 return __builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
|
|
357 }
|
|
358
|
|
359 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
360 _storebe_i32(void * __P, int __D) {
|
|
361 struct __storeu_i32 {
|
|
362 int __v;
|
|
363 } __attribute__((__packed__, __may_alias__));
|
|
364 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
|
|
365 }
|
|
366
|
|
367 #ifdef __x86_64__
|
|
368 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
369 _loadbe_i64(void const * __P) {
|
|
370 struct __loadu_i64 {
|
|
371 long long __v;
|
|
372 } __attribute__((__packed__, __may_alias__));
|
|
373 return __builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
|
|
374 }
|
|
375
|
|
376 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
377 _storebe_i64(void * __P, long long __D) {
|
|
378 struct __storeu_i64 {
|
|
379 long long __v;
|
|
380 } __attribute__((__packed__, __may_alias__));
|
|
381 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
|
|
382 }
|
|
383 #endif
|
|
384 #endif /* __MOVBE */
|
|
385
|
173
|
386 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
387 defined(__RTM__)
|
150
|
388 #include <rtmintrin.h>
|
|
389 #include <xtestintrin.h>
|
|
390 #endif
|
|
391
|
173
|
392 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
393 defined(__SHA__)
|
150
|
394 #include <shaintrin.h>
|
|
395 #endif
|
|
396
|
173
|
397 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
398 defined(__FXSR__)
|
150
|
399 #include <fxsrintrin.h>
|
|
400 #endif
|
|
401
|
|
402 /* No feature check desired due to internal MSC_VER checks */
|
|
403 #include <xsaveintrin.h>
|
|
404
|
173
|
405 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
406 defined(__XSAVEOPT__)
|
150
|
407 #include <xsaveoptintrin.h>
|
|
408 #endif
|
|
409
|
173
|
410 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
411 defined(__XSAVEC__)
|
150
|
412 #include <xsavecintrin.h>
|
|
413 #endif
|
|
414
|
173
|
415 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
416 defined(__XSAVES__)
|
150
|
417 #include <xsavesintrin.h>
|
|
418 #endif
|
|
419
|
173
|
420 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
421 defined(__SHSTK__)
|
150
|
422 #include <cetintrin.h>
|
|
423 #endif
|
|
424
|
|
425 /* Some intrinsics inside adxintrin.h are available only on processors with ADX,
|
|
426 * whereas others are also available at all times. */
|
|
427 #include <adxintrin.h>
|
|
428
|
173
|
429 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
430 defined(__RDSEED__)
|
150
|
431 #include <rdseedintrin.h>
|
|
432 #endif
|
|
433
|
173
|
434 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
435 defined(__WBNOINVD__)
|
150
|
436 #include <wbnoinvdintrin.h>
|
|
437 #endif
|
|
438
|
173
|
439 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
440 defined(__CLDEMOTE__)
|
150
|
441 #include <cldemoteintrin.h>
|
|
442 #endif
|
|
443
|
173
|
444 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
445 defined(__WAITPKG__)
|
150
|
446 #include <waitpkgintrin.h>
|
|
447 #endif
|
|
448
|
173
|
449 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
450 defined(__MOVDIRI__) || defined(__MOVDIR64B__)
|
150
|
451 #include <movdirintrin.h>
|
|
452 #endif
|
|
453
|
173
|
454 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
455 defined(__PCONFIG__)
|
150
|
456 #include <pconfigintrin.h>
|
|
457 #endif
|
|
458
|
173
|
459 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
460 defined(__SGX__)
|
150
|
461 #include <sgxintrin.h>
|
|
462 #endif
|
|
463
|
173
|
464 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
465 defined(__PTWRITE__)
|
150
|
466 #include <ptwriteintrin.h>
|
|
467 #endif
|
|
468
|
173
|
469 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
470 defined(__INVPCID__)
|
150
|
471 #include <invpcidintrin.h>
|
|
472 #endif
|
|
473
|
173
|
474 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
475 defined(__AVX512VP2INTERSECT__)
|
150
|
476 #include <avx512vp2intersectintrin.h>
|
|
477 #endif
|
|
478
|
173
|
479 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
480 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
|
150
|
481 #include <avx512vlvp2intersectintrin.h>
|
|
482 #endif
|
|
483
|
173
|
484 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
485 defined(__ENQCMD__)
|
150
|
486 #include <enqcmdintrin.h>
|
|
487 #endif
|
|
488
|
173
|
489 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
490 defined(__SERIALIZE__)
|
|
491 #include <serializeintrin.h>
|
|
492 #endif
|
|
493
|
|
494 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
495 defined(__TSXLDTRK__)
|
|
496 #include <tsxldtrkintrin.h>
|
|
497 #endif
|
|
498
|
150
|
499 #if defined(_MSC_VER) && __has_extension(gnu_asm)
|
|
500 /* Define the default attributes for these intrinsics */
|
|
501 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
|
502 #ifdef __cplusplus
|
|
503 extern "C" {
|
|
504 #endif
|
|
505 /*----------------------------------------------------------------------------*\
|
|
506 |* Interlocked Exchange HLE
|
|
507 \*----------------------------------------------------------------------------*/
|
|
508 #if defined(__i386__) || defined(__x86_64__)
|
|
509 static __inline__ long __DEFAULT_FN_ATTRS
|
|
510 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
|
|
511 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
|
|
512 : "+r" (_Value), "+m" (*_Target) :: "memory");
|
|
513 return _Value;
|
|
514 }
|
|
515 static __inline__ long __DEFAULT_FN_ATTRS
|
|
516 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
|
|
517 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
|
|
518 : "+r" (_Value), "+m" (*_Target) :: "memory");
|
|
519 return _Value;
|
|
520 }
|
|
521 #endif
|
|
522 #if defined(__x86_64__)
|
|
523 static __inline__ __int64 __DEFAULT_FN_ATTRS
|
|
524 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
|
|
525 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
|
|
526 : "+r" (_Value), "+m" (*_Target) :: "memory");
|
|
527 return _Value;
|
|
528 }
|
|
529 static __inline__ __int64 __DEFAULT_FN_ATTRS
|
|
530 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
|
|
531 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
|
|
532 : "+r" (_Value), "+m" (*_Target) :: "memory");
|
|
533 return _Value;
|
|
534 }
|
|
535 #endif
|
|
536 /*----------------------------------------------------------------------------*\
|
|
537 |* Interlocked Compare Exchange HLE
|
|
538 \*----------------------------------------------------------------------------*/
|
|
539 #if defined(__i386__) || defined(__x86_64__)
|
|
540 static __inline__ long __DEFAULT_FN_ATTRS
|
|
541 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
|
|
542 long _Exchange, long _Comparand) {
|
|
543 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
|
|
544 : "+a" (_Comparand), "+m" (*_Destination)
|
|
545 : "r" (_Exchange) : "memory");
|
|
546 return _Comparand;
|
|
547 }
|
|
548 static __inline__ long __DEFAULT_FN_ATTRS
|
|
549 _InterlockedCompareExchange_HLERelease(long volatile *_Destination,
|
|
550 long _Exchange, long _Comparand) {
|
|
551 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
|
|
552 : "+a" (_Comparand), "+m" (*_Destination)
|
|
553 : "r" (_Exchange) : "memory");
|
|
554 return _Comparand;
|
|
555 }
|
|
556 #endif
|
|
557 #if defined(__x86_64__)
|
|
558 static __inline__ __int64 __DEFAULT_FN_ATTRS
|
|
559 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
|
|
560 __int64 _Exchange, __int64 _Comparand) {
|
|
561 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
|
|
562 : "+a" (_Comparand), "+m" (*_Destination)
|
|
563 : "r" (_Exchange) : "memory");
|
|
564 return _Comparand;
|
|
565 }
|
|
566 static __inline__ __int64 __DEFAULT_FN_ATTRS
|
|
567 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
|
|
568 __int64 _Exchange, __int64 _Comparand) {
|
|
569 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
|
|
570 : "+a" (_Comparand), "+m" (*_Destination)
|
|
571 : "r" (_Exchange) : "memory");
|
|
572 return _Comparand;
|
|
573 }
|
|
574 #endif
|
|
575 #ifdef __cplusplus
|
|
576 }
|
|
577 #endif
|
|
578
|
|
579 #undef __DEFAULT_FN_ATTRS
|
|
580
|
|
581 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
|
|
582
|
|
583 #endif /* __IMMINTRIN_H */
|