150
|
1 /* ===-------- ia32intrin.h ---------------------------------------------------===
|
|
2 *
|
|
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 * See https://llvm.org/LICENSE.txt for license information.
|
|
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 *
|
|
7 *===-----------------------------------------------------------------------===
|
|
8 */
|
|
9
|
|
10 #ifndef __X86INTRIN_H
|
|
11 #error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
|
|
12 #endif
|
|
13
|
|
14 #ifndef __IA32INTRIN_H
|
|
15 #define __IA32INTRIN_H
|
|
16
|
|
17 /** Find the first set bit starting from the lsb. Result is undefined if
|
|
18 * input is 0.
|
|
19 *
|
|
20 * \headerfile <x86intrin.h>
|
|
21 *
|
|
22 * This intrinsic corresponds to the <c> BSF </c> instruction or the
|
|
23 * <c> TZCNT </c> instruction.
|
|
24 *
|
|
25 * \param __A
|
|
26 * A 32-bit integer operand.
|
|
27 * \returns A 32-bit integer containing the bit number.
|
|
28 */
|
|
29 static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
|
30 __bsfd(int __A) {
|
|
31 return __builtin_ctz(__A);
|
|
32 }
|
|
33
|
|
34 /** Find the first set bit starting from the msb. Result is undefined if
|
|
35 * input is 0.
|
|
36 *
|
|
37 * \headerfile <x86intrin.h>
|
|
38 *
|
|
39 * This intrinsic corresponds to the <c> BSR </c> instruction or the
|
|
40 * <c> LZCNT </c> instruction and an <c> XOR </c>.
|
|
41 *
|
|
42 * \param __A
|
|
43 * A 32-bit integer operand.
|
|
44 * \returns A 32-bit integer containing the bit number.
|
|
45 */
|
|
46 static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
|
47 __bsrd(int __A) {
|
|
48 return 31 - __builtin_clz(__A);
|
|
49 }
|
|
50
|
|
51 /** Swaps the bytes in the input. Converting little endian to big endian or
|
|
52 * vice versa.
|
|
53 *
|
|
54 * \headerfile <x86intrin.h>
|
|
55 *
|
|
56 * This intrinsic corresponds to the <c> BSWAP </c> instruction.
|
|
57 *
|
|
58 * \param __A
|
|
59 * A 32-bit integer operand.
|
|
60 * \returns A 32-bit integer containing the swapped bytes.
|
|
61 */
|
|
62 static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
|
63 __bswapd(int __A) {
|
|
64 return __builtin_bswap32(__A);
|
|
65 }
|
|
66
|
|
67 static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
|
68 _bswap(int __A) {
|
|
69 return __builtin_bswap32(__A);
|
|
70 }
|
|
71
|
|
72 #define _bit_scan_forward(A) __bsfd((A))
|
|
73 #define _bit_scan_reverse(A) __bsrd((A))
|
|
74
|
|
75 #ifdef __x86_64__
|
|
76 /** Find the first set bit starting from the lsb. Result is undefined if
|
|
77 * input is 0.
|
|
78 *
|
|
79 * \headerfile <x86intrin.h>
|
|
80 *
|
|
81 * This intrinsic corresponds to the <c> BSF </c> instruction or the
|
|
82 * <c> TZCNT </c> instruction.
|
|
83 *
|
|
84 * \param __A
|
|
85 * A 64-bit integer operand.
|
|
86 * \returns A 32-bit integer containing the bit number.
|
|
87 */
|
|
88 static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
|
89 __bsfq(long long __A) {
|
|
90 return __builtin_ctzll(__A);
|
|
91 }
|
|
92
|
|
93 /** Find the first set bit starting from the msb. Result is undefined if
|
|
94 * input is 0.
|
|
95 *
|
|
96 * \headerfile <x86intrin.h>
|
|
97 *
|
|
98 * This intrinsic corresponds to the <c> BSR </c> instruction or the
|
|
99 * <c> LZCNT </c> instruction and an <c> XOR </c>.
|
|
100 *
|
|
101 * \param __A
|
|
102 * A 64-bit integer operand.
|
|
103 * \returns A 32-bit integer containing the bit number.
|
|
104 */
|
|
105 static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
|
106 __bsrq(long long __A) {
|
|
107 return 63 - __builtin_clzll(__A);
|
|
108 }
|
|
109
|
|
110 /** Swaps the bytes in the input. Converting little endian to big endian or
|
|
111 * vice versa.
|
|
112 *
|
|
113 * \headerfile <x86intrin.h>
|
|
114 *
|
|
115 * This intrinsic corresponds to the <c> BSWAP </c> instruction.
|
|
116 *
|
|
117 * \param __A
|
|
118 * A 64-bit integer operand.
|
|
119 * \returns A 64-bit integer containing the swapped bytes.
|
|
120 */
|
|
121 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
|
|
122 __bswapq(long long __A) {
|
|
123 return __builtin_bswap64(__A);
|
|
124 }
|
|
125
|
|
126 #define _bswap64(A) __bswapq((A))
|
|
127 #endif
|
|
128
|
|
129 /** Counts the number of bits in the source operand having a value of 1.
|
|
130 *
|
|
131 * \headerfile <x86intrin.h>
|
|
132 *
|
|
133 * This intrinsic corresponds to the <c> POPCNT </c> instruction or a
|
|
134 * a sequence of arithmetic and logic ops to calculate it.
|
|
135 *
|
|
136 * \param __A
|
|
137 * An unsigned 32-bit integer operand.
|
|
138 * \returns A 32-bit integer containing the number of bits with value 1 in the
|
|
139 * source operand.
|
|
140 */
|
|
141 static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
|
142 __popcntd(unsigned int __A)
|
|
143 {
|
|
144 return __builtin_popcount(__A);
|
|
145 }
|
|
146
|
|
147 #define _popcnt32(A) __popcntd((A))
|
|
148
|
|
149 #ifdef __x86_64__
|
|
150 /** Counts the number of bits in the source operand having a value of 1.
|
|
151 *
|
|
152 * \headerfile <x86intrin.h>
|
|
153 *
|
|
154 * This intrinsic corresponds to the <c> POPCNT </c> instruction or a
|
|
155 * a sequence of arithmetic and logic ops to calculate it.
|
|
156 *
|
|
157 * \param __A
|
|
158 * An unsigned 64-bit integer operand.
|
|
159 * \returns A 64-bit integer containing the number of bits with value 1 in the
|
|
160 * source operand.
|
|
161 */
|
|
162 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
|
|
163 __popcntq(unsigned long long __A)
|
|
164 {
|
|
165 return __builtin_popcountll(__A);
|
|
166 }
|
|
167
|
|
168 #define _popcnt64(A) __popcntq((A))
|
|
169 #endif /* __x86_64__ */
|
|
170
|
|
171 #ifdef __x86_64__
|
|
172 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
|
|
173 __readeflags(void)
|
|
174 {
|
|
175 return __builtin_ia32_readeflags_u64();
|
|
176 }
|
|
177
|
|
178 static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
|
179 __writeeflags(unsigned long long __f)
|
|
180 {
|
|
181 __builtin_ia32_writeeflags_u64(__f);
|
|
182 }
|
|
183
|
|
184 #else /* !__x86_64__ */
|
|
185 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
|
186 __readeflags(void)
|
|
187 {
|
|
188 return __builtin_ia32_readeflags_u32();
|
|
189 }
|
|
190
|
|
191 static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
|
192 __writeeflags(unsigned int __f)
|
|
193 {
|
|
194 __builtin_ia32_writeeflags_u32(__f);
|
|
195 }
|
|
196 #endif /* !__x86_64__ */
|
|
197
|
|
198 /** Cast a 32-bit float value to a 32-bit unsigned integer value
|
|
199 *
|
|
200 * \headerfile <x86intrin.h>
|
|
201 * This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction in x86_64,
|
|
202 * and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
|
|
203 *
|
|
204 * \param __A
|
|
205 * A 32-bit float value.
|
|
206 * \returns a 32-bit unsigned integer containing the converted value.
|
|
207 */
|
|
208 static __inline__ unsigned int __attribute__((__always_inline__))
|
|
209 _castf32_u32(float __A) {
|
|
210 unsigned int D;
|
|
211 __builtin_memcpy(&D, &__A, sizeof(__A));
|
|
212 return D;
|
|
213 }
|
|
214
|
|
215 /** Cast a 64-bit float value to a 64-bit unsigned integer value
|
|
216 *
|
|
217 * \headerfile <x86intrin.h>
|
|
218 * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
|
|
219 * and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
|
|
220 *
|
|
221 * \param __A
|
|
222 * A 64-bit float value.
|
|
223 * \returns a 64-bit unsigned integer containing the converted value.
|
|
224 */
|
|
225 static __inline__ unsigned long long __attribute__((__always_inline__))
|
|
226 _castf64_u64(double __A) {
|
|
227 unsigned long long D;
|
|
228 __builtin_memcpy(&D, &__A, sizeof(__A));
|
|
229 return D;
|
|
230 }
|
|
231
|
|
232 /** Cast a 32-bit unsigned integer value to a 32-bit float value
|
|
233 *
|
|
234 * \headerfile <x86intrin.h>
|
|
235 * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
|
|
236 * and corresponds to the <c> FLDS </c> instruction in ia32.
|
|
237 *
|
|
238 * \param __A
|
|
239 * A 32-bit unsigned integer value.
|
|
240 * \returns a 32-bit float value containing the converted value.
|
|
241 */
|
|
242 static __inline__ float __attribute__((__always_inline__))
|
|
243 _castu32_f32(unsigned int __A) {
|
|
244 float D;
|
|
245 __builtin_memcpy(&D, &__A, sizeof(__A));
|
|
246 return D;
|
|
247 }
|
|
248
|
|
249 /** Cast a 64-bit unsigned integer value to a 64-bit float value
|
|
250 *
|
|
251 * \headerfile <x86intrin.h>
|
|
252 * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
|
|
253 * and corresponds to the <c> FLDL </c> instruction in ia32.
|
|
254 *
|
|
255 * \param __A
|
|
256 * A 64-bit unsigned integer value.
|
|
257 * \returns a 64-bit float value containing the converted value.
|
|
258 */
|
|
259 static __inline__ double __attribute__((__always_inline__))
|
|
260 _castu64_f64(unsigned long long __A) {
|
|
261 double D;
|
|
262 __builtin_memcpy(&D, &__A, sizeof(__A));
|
|
263 return D;
|
|
264 }
|
|
265
|
|
266 /** Adds the unsigned integer operand to the CRC-32C checksum of the
|
|
267 * unsigned char operand.
|
|
268 *
|
|
269 * \headerfile <x86intrin.h>
|
|
270 *
|
|
271 * This intrinsic corresponds to the <c> CRC32B </c> instruction.
|
|
272 *
|
|
273 * \param __C
|
|
274 * An unsigned integer operand to add to the CRC-32C checksum of operand
|
|
275 * \a __D.
|
|
276 * \param __D
|
|
277 * An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
|
|
278 * \returns The result of adding operand \a __C to the CRC-32C checksum of
|
|
279 * operand \a __D.
|
|
280 */
|
|
281 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
|
|
282 __crc32b(unsigned int __C, unsigned char __D)
|
|
283 {
|
|
284 return __builtin_ia32_crc32qi(__C, __D);
|
|
285 }
|
|
286
|
|
287 /** Adds the unsigned integer operand to the CRC-32C checksum of the
|
|
288 * unsigned short operand.
|
|
289 *
|
|
290 * \headerfile <x86intrin.h>
|
|
291 *
|
|
292 * This intrinsic corresponds to the <c> CRC32W </c> instruction.
|
|
293 *
|
|
294 * \param __C
|
|
295 * An unsigned integer operand to add to the CRC-32C checksum of operand
|
|
296 * \a __D.
|
|
297 * \param __D
|
|
298 * An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
|
|
299 * \returns The result of adding operand \a __C to the CRC-32C checksum of
|
|
300 * operand \a __D.
|
|
301 */
|
|
302 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
|
|
303 __crc32w(unsigned int __C, unsigned short __D)
|
|
304 {
|
|
305 return __builtin_ia32_crc32hi(__C, __D);
|
|
306 }
|
|
307
|
|
308 /** Adds the unsigned integer operand to the CRC-32C checksum of the
|
|
309 * second unsigned integer operand.
|
|
310 *
|
|
311 * \headerfile <x86intrin.h>
|
|
312 *
|
|
313 * This intrinsic corresponds to the <c> CRC32D </c> instruction.
|
|
314 *
|
|
315 * \param __C
|
|
316 * An unsigned integer operand to add to the CRC-32C checksum of operand
|
|
317 * \a __D.
|
|
318 * \param __D
|
|
319 * An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
|
|
320 * \returns The result of adding operand \a __C to the CRC-32C checksum of
|
|
321 * operand \a __D.
|
|
322 */
|
|
323 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
|
|
324 __crc32d(unsigned int __C, unsigned int __D)
|
|
325 {
|
|
326 return __builtin_ia32_crc32si(__C, __D);
|
|
327 }
|
|
328
|
|
329 #ifdef __x86_64__
|
|
330 /** Adds the unsigned integer operand to the CRC-32C checksum of the
|
|
331 * unsigned 64-bit integer operand.
|
|
332 *
|
|
333 * \headerfile <x86intrin.h>
|
|
334 *
|
|
335 * This intrinsic corresponds to the <c> CRC32Q </c> instruction.
|
|
336 *
|
|
337 * \param __C
|
|
338 * An unsigned integer operand to add to the CRC-32C checksum of operand
|
|
339 * \a __D.
|
|
340 * \param __D
|
|
341 * An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
|
|
342 * \returns The result of adding operand \a __C to the CRC-32C checksum of
|
|
343 * operand \a __D.
|
|
344 */
|
|
345 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
|
|
346 __crc32q(unsigned long long __C, unsigned long long __D)
|
|
347 {
|
|
348 return __builtin_ia32_crc32di(__C, __D);
|
|
349 }
|
|
350 #endif /* __x86_64__ */
|
|
351
|
|
352 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
|
|
353 __rdpmc(int __A) {
|
|
354 return __builtin_ia32_rdpmc(__A);
|
|
355 }
|
|
356
|
|
357 /* __rdtscp */
|
|
358 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
|
|
359 __rdtscp(unsigned int *__A) {
|
|
360 return __builtin_ia32_rdtscp(__A);
|
|
361 }
|
|
362
|
|
363 #define _rdtsc() __rdtsc()
|
|
364
|
|
365 #define _rdpmc(A) __rdpmc(A)
|
|
366
|
|
367 static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
|
368 _wbinvd(void) {
|
|
369 __builtin_ia32_wbinvd();
|
|
370 }
|
|
371
|
|
372 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
|
373 __rolb(unsigned char __X, int __C) {
|
|
374 return __builtin_rotateleft8(__X, __C);
|
|
375 }
|
|
376
|
|
377 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
|
378 __rorb(unsigned char __X, int __C) {
|
|
379 return __builtin_rotateright8(__X, __C);
|
|
380 }
|
|
381
|
|
382 static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
|
|
383 __rolw(unsigned short __X, int __C) {
|
|
384 return __builtin_rotateleft16(__X, __C);
|
|
385 }
|
|
386
|
|
387 static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
|
|
388 __rorw(unsigned short __X, int __C) {
|
|
389 return __builtin_rotateright16(__X, __C);
|
|
390 }
|
|
391
|
|
392 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
|
393 __rold(unsigned int __X, int __C) {
|
|
394 return __builtin_rotateleft32(__X, __C);
|
|
395 }
|
|
396
|
|
397 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
|
398 __rord(unsigned int __X, int __C) {
|
|
399 return __builtin_rotateright32(__X, __C);
|
|
400 }
|
|
401
|
|
402 #ifdef __x86_64__
|
|
403 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
|
|
404 __rolq(unsigned long long __X, int __C) {
|
|
405 return __builtin_rotateleft64(__X, __C);
|
|
406 }
|
|
407
|
|
408 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
|
|
409 __rorq(unsigned long long __X, int __C) {
|
|
410 return __builtin_rotateright64(__X, __C);
|
|
411 }
|
|
412 #endif /* __x86_64__ */
|
|
413
|
|
414 #ifndef _MSC_VER
|
|
415 /* These are already provided as builtins for MSVC. */
|
|
416 /* Select the correct function based on the size of long. */
|
|
417 #ifdef __LP64__
|
|
418 #define _lrotl(a,b) __rolq((a), (b))
|
|
419 #define _lrotr(a,b) __rorq((a), (b))
|
|
420 #else
|
|
421 #define _lrotl(a,b) __rold((a), (b))
|
|
422 #define _lrotr(a,b) __rord((a), (b))
|
|
423 #endif
|
|
424 #define _rotl(a,b) __rold((a), (b))
|
|
425 #define _rotr(a,b) __rord((a), (b))
|
|
426 #endif // _MSC_VER
|
|
427
|
|
428 /* These are not builtins so need to be provided in all modes. */
|
|
429 #define _rotwl(a,b) __rolw((a), (b))
|
|
430 #define _rotwr(a,b) __rorw((a), (b))
|
|
431
|
|
432 #endif /* __IA32INTRIN_H */
|