111
|
1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
|
145
|
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
|
111
|
3
|
|
4 This file is part of the GNU C Library.
|
|
5
|
|
6 The GNU C Library is free software; you can redistribute it and/or
|
|
7 modify it under the terms of the GNU Lesser General Public
|
|
8 License as published by the Free Software Foundation; either
|
|
9 version 2.1 of the License, or (at your option) any later version.
|
|
10
|
|
11 In addition to the permissions in the GNU Lesser General Public
|
|
12 License, the Free Software Foundation gives you unlimited
|
|
13 permission to link the compiled version of this file into
|
|
14 combinations with other programs, and to distribute those
|
|
15 combinations without any restriction coming from the use of this
|
|
16 file. (The Lesser General Public License restrictions do apply in
|
|
17 other respects; for example, they cover modification of the file,
|
|
18 and distribution when not linked into a combine executable.)
|
|
19
|
|
20 The GNU C Library is distributed in the hope that it will be useful,
|
|
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
23 Lesser General Public License for more details.
|
|
24
|
|
25 You should have received a copy of the GNU Lesser General Public
|
|
26 License along with the GNU C Library; if not, see
|
|
27 <http://www.gnu.org/licenses/>. */
|
|
28
|
|
29 /* You have to define the following before including this file:
|
|
30
|
|
31 UWtype -- An unsigned type, default type for operations (typically a "word")
|
|
32 UHWtype -- An unsigned type, at least half the size of UWtype.
|
|
33 UDWtype -- An unsigned type, at least twice as large a UWtype
|
|
34 W_TYPE_SIZE -- size in bits of UWtype
|
|
35
|
|
36 UQItype -- Unsigned 8 bit type.
|
|
37 SItype, USItype -- Signed and unsigned 32 bit types.
|
|
38 DItype, UDItype -- Signed and unsigned 64 bit types.
|
|
39
|
|
40 On a 32 bit machine UWtype should typically be USItype;
|
|
41 on a 64 bit machine, UWtype should typically be UDItype. */
|
|
42
|
|
43 #define __BITS4 (W_TYPE_SIZE / 4)
|
|
44 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
|
|
45 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
|
|
46 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
|
|
47
|
|
48 #ifndef W_TYPE_SIZE
|
|
49 #define W_TYPE_SIZE 32
|
|
50 #define UWtype USItype
|
|
51 #define UHWtype USItype
|
|
52 #define UDWtype UDItype
|
|
53 #endif
|
|
54
|
|
55 /* Used in glibc only. */
|
|
56 #ifndef attribute_hidden
|
|
57 #define attribute_hidden
|
|
58 #endif
|
|
59
|
|
60 extern const UQItype __clz_tab[256] attribute_hidden;
|
|
61
|
|
62 /* Define auxiliary asm macros.
|
|
63
|
|
64 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
|
|
65 UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
|
|
66 word product in HIGH_PROD and LOW_PROD.
|
|
67
|
|
68 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
|
|
69 UDWtype product. This is just a variant of umul_ppmm.
|
|
70
|
|
71 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
|
|
72 denominator) divides a UDWtype, composed by the UWtype integers
|
|
73 HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
|
|
74 in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
|
|
75 than DENOMINATOR for correct operation. If, in addition, the most
|
|
76 significant bit of DENOMINATOR must be 1, then the pre-processor symbol
|
|
77 UDIV_NEEDS_NORMALIZATION is defined to 1.
|
|
78
|
|
79 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
|
|
80 denominator). Like udiv_qrnnd but the numbers are signed. The quotient
|
|
81 is rounded towards 0.
|
|
82
|
|
83 5) count_leading_zeros(count, x) counts the number of zero-bits from the
|
|
84 msb to the first nonzero bit in the UWtype X. This is the number of
|
|
85 steps X needs to be shifted left to set the msb. Undefined for X == 0,
|
|
86 unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
|
|
87
|
|
88 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
|
|
89 from the least significant end.
|
|
90
|
|
91 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
|
|
92 high_addend_2, low_addend_2) adds two UWtype integers, composed by
|
|
93 HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
|
|
94 respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
|
|
95 (i.e. carry out) is not stored anywhere, and is lost.
|
|
96
|
|
97 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
|
|
98 high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
|
|
99 composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
|
|
100 LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
|
|
101 and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
|
|
102 and is lost.
|
|
103
|
|
104 If any of these macros are left undefined for a particular CPU,
|
|
105 C macros are used. */
|
|
106
|
|
107 /* The CPUs come in alphabetical order below.
|
|
108
|
|
109 Please add support for more CPUs here, or improve the current support
|
|
110 for the CPUs below!
|
|
111 (E.g. WE32100, IBM360.) */
|
|
112
|
|
113 #if defined (__GNUC__) && !defined (NO_ASM)
|
|
114
|
|
115 /* We sometimes need to clobber "cc" with gcc2, but that would not be
|
|
116 understood by gcc1. Use cpp to avoid major code duplication. */
|
|
117 #if __GNUC__ < 2
|
|
118 #define __CLOBBER_CC
|
|
119 #define __AND_CLOBBER_CC
|
|
120 #else /* __GNUC__ >= 2 */
|
|
121 #define __CLOBBER_CC : "cc"
|
|
122 #define __AND_CLOBBER_CC , "cc"
|
|
123 #endif /* __GNUC__ < 2 */
|
|
124
|
|
125 #if defined (__aarch64__)
|
|
126
|
|
127 #if W_TYPE_SIZE == 32
|
|
128 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
|
|
129 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
|
|
130 #define COUNT_LEADING_ZEROS_0 32
|
|
131 #endif /* W_TYPE_SIZE == 32 */
|
|
132
|
|
133 #if W_TYPE_SIZE == 64
|
|
134 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clzll (X))
|
|
135 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctzll (X))
|
|
136 #define COUNT_LEADING_ZEROS_0 64
|
|
137 #endif /* W_TYPE_SIZE == 64 */
|
|
138
|
|
139 #endif /* __aarch64__ */
|
|
140
|
|
141 #if defined (__alpha) && W_TYPE_SIZE == 64
|
|
142 /* There is a bug in g++ before version 5 that
|
|
143 errors on __builtin_alpha_umulh. */
|
|
144 #if !defined(__cplusplus) || __GNUC__ >= 5
|
|
145 #define umul_ppmm(ph, pl, m0, m1) \
|
|
146 do { \
|
|
147 UDItype __m0 = (m0), __m1 = (m1); \
|
|
148 (ph) = __builtin_alpha_umulh (__m0, __m1); \
|
|
149 (pl) = __m0 * __m1; \
|
|
150 } while (0)
|
|
151 #define UMUL_TIME 46
|
|
152 #endif /* !c++ */
|
|
153 #ifndef LONGLONG_STANDALONE
|
|
154 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
155 do { UDItype __r; \
|
|
156 (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
|
|
157 (r) = __r; \
|
|
158 } while (0)
|
|
159 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
|
|
160 #define UDIV_TIME 220
|
|
161 #endif /* LONGLONG_STANDALONE */
|
|
162 #ifdef __alpha_cix__
|
|
163 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
|
|
164 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
|
|
165 #define COUNT_LEADING_ZEROS_0 64
|
|
166 #else
|
|
167 #define count_leading_zeros(COUNT,X) \
|
|
168 do { \
|
|
169 UDItype __xr = (X), __t, __a; \
|
|
170 __t = __builtin_alpha_cmpbge (0, __xr); \
|
|
171 __a = __clz_tab[__t ^ 0xff] - 1; \
|
|
172 __t = __builtin_alpha_extbl (__xr, __a); \
|
|
173 (COUNT) = 64 - (__clz_tab[__t] + __a*8); \
|
|
174 } while (0)
|
|
175 #define count_trailing_zeros(COUNT,X) \
|
|
176 do { \
|
|
177 UDItype __xr = (X), __t, __a; \
|
|
178 __t = __builtin_alpha_cmpbge (0, __xr); \
|
|
179 __t = ~__t & -~__t; \
|
|
180 __a = ((__t & 0xCC) != 0) * 2; \
|
|
181 __a += ((__t & 0xF0) != 0) * 4; \
|
|
182 __a += ((__t & 0xAA) != 0); \
|
|
183 __t = __builtin_alpha_extbl (__xr, __a); \
|
|
184 __a <<= 3; \
|
|
185 __t &= -__t; \
|
|
186 __a += ((__t & 0xCC) != 0) * 2; \
|
|
187 __a += ((__t & 0xF0) != 0) * 4; \
|
|
188 __a += ((__t & 0xAA) != 0); \
|
|
189 (COUNT) = __a; \
|
|
190 } while (0)
|
|
191 #endif /* __alpha_cix__ */
|
|
192 #endif /* __alpha */
|
|
193
|
|
194 #if defined (__arc__) && W_TYPE_SIZE == 32
|
|
195 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
196 __asm__ ("add.f %1, %4, %5\n\tadc %0, %2, %3" \
|
|
197 : "=r" ((USItype) (sh)), \
|
|
198 "=&r" ((USItype) (sl)) \
|
|
199 : "%r" ((USItype) (ah)), \
|
|
200 "rICal" ((USItype) (bh)), \
|
|
201 "%r" ((USItype) (al)), \
|
145
|
202 "rICal" ((USItype) (bl)) \
|
|
203 : "cc")
|
111
|
204 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
205 __asm__ ("sub.f %1, %4, %5\n\tsbc %0, %2, %3" \
|
|
206 : "=r" ((USItype) (sh)), \
|
|
207 "=&r" ((USItype) (sl)) \
|
|
208 : "r" ((USItype) (ah)), \
|
|
209 "rICal" ((USItype) (bh)), \
|
|
210 "r" ((USItype) (al)), \
|
145
|
211 "rICal" ((USItype) (bl)) \
|
|
212 : "cc")
|
111
|
213
|
|
214 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
|
|
215 #ifdef __ARC_NORM__
|
|
216 #define count_leading_zeros(count, x) \
|
|
217 do \
|
|
218 { \
|
|
219 SItype c_; \
|
|
220 \
|
|
221 __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
|
|
222 (count) = c_ + 1; \
|
|
223 } \
|
|
224 while (0)
|
|
225 #define COUNT_LEADING_ZEROS_0 32
|
|
226 #endif /* __ARC_NORM__ */
|
|
227 #endif /* __arc__ */
|
|
228
|
|
229 #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
|
|
230 && W_TYPE_SIZE == 32
|
|
231 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
232 __asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \
|
|
233 : "=r" ((USItype) (sh)), \
|
|
234 "=&r" ((USItype) (sl)) \
|
|
235 : "%r" ((USItype) (ah)), \
|
|
236 "rI" ((USItype) (bh)), \
|
|
237 "%r" ((USItype) (al)), \
|
|
238 "rI" ((USItype) (bl)) __CLOBBER_CC)
|
|
239 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
240 __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \
|
|
241 : "=r" ((USItype) (sh)), \
|
|
242 "=&r" ((USItype) (sl)) \
|
|
243 : "r" ((USItype) (ah)), \
|
|
244 "rI" ((USItype) (bh)), \
|
|
245 "r" ((USItype) (al)), \
|
|
246 "rI" ((USItype) (bl)) __CLOBBER_CC)
|
|
247 # if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
|
|
248 || defined(__ARM_ARCH_3__)
|
|
249 # define umul_ppmm(xh, xl, a, b) \
|
|
250 do { \
|
|
251 register USItype __t0, __t1, __t2; \
|
|
252 __asm__ ("%@ Inlined umul_ppmm\n" \
|
|
253 " mov %2, %5, lsr #16\n" \
|
|
254 " mov %0, %6, lsr #16\n" \
|
|
255 " bic %3, %5, %2, lsl #16\n" \
|
|
256 " bic %4, %6, %0, lsl #16\n" \
|
|
257 " mul %1, %3, %4\n" \
|
|
258 " mul %4, %2, %4\n" \
|
|
259 " mul %3, %0, %3\n" \
|
|
260 " mul %0, %2, %0\n" \
|
|
261 " adds %3, %4, %3\n" \
|
|
262 " addcs %0, %0, #65536\n" \
|
|
263 " adds %1, %1, %3, lsl #16\n" \
|
|
264 " adc %0, %0, %3, lsr #16" \
|
|
265 : "=&r" ((USItype) (xh)), \
|
|
266 "=r" ((USItype) (xl)), \
|
|
267 "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \
|
|
268 : "r" ((USItype) (a)), \
|
|
269 "r" ((USItype) (b)) __CLOBBER_CC ); \
|
|
270 } while (0)
|
|
271 # define UMUL_TIME 20
|
|
272 # else
|
|
273 # define umul_ppmm(xh, xl, a, b) \
|
|
274 do { \
|
|
275 /* Generate umull, under compiler control. */ \
|
|
276 register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \
|
|
277 (xl) = (USItype)__t0; \
|
|
278 (xh) = (USItype)(__t0 >> 32); \
|
|
279 } while (0)
|
|
280 # define UMUL_TIME 3
|
|
281 # endif
|
|
282 # define UDIV_TIME 100
|
|
283 #endif /* __arm__ */
|
|
284
|
|
285 #if defined(__arm__)
|
|
286 /* Let gcc decide how best to implement count_leading_zeros. */
|
|
287 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
|
|
288 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
|
|
289 #define COUNT_LEADING_ZEROS_0 32
|
|
290 #endif
|
|
291
|
|
292 #if defined (__AVR__)
|
|
293
|
|
294 #if W_TYPE_SIZE == 16
|
|
295 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
|
|
296 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
|
|
297 #define COUNT_LEADING_ZEROS_0 16
|
|
298 #endif /* W_TYPE_SIZE == 16 */
|
|
299
|
|
300 #if W_TYPE_SIZE == 32
|
|
301 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
|
|
302 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
|
|
303 #define COUNT_LEADING_ZEROS_0 32
|
|
304 #endif /* W_TYPE_SIZE == 32 */
|
|
305
|
|
306 #if W_TYPE_SIZE == 64
|
|
307 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzll (X))
|
|
308 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
|
|
309 #define COUNT_LEADING_ZEROS_0 64
|
|
310 #endif /* W_TYPE_SIZE == 64 */
|
|
311
|
|
312 #endif /* defined (__AVR__) */
|
|
313
|
|
314 #if defined (__CRIS__)
|
|
315
|
|
316 #if __CRIS_arch_version >= 3
|
|
317 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
|
|
318 #define COUNT_LEADING_ZEROS_0 32
|
|
319 #endif /* __CRIS_arch_version >= 3 */
|
|
320
|
|
321 #if __CRIS_arch_version >= 8
|
|
322 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
|
|
323 #endif /* __CRIS_arch_version >= 8 */
|
|
324
|
|
325 #if __CRIS_arch_version >= 10
|
|
326 #define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v))
|
|
327 #else
|
|
328 #define __umulsidi3 __umulsidi3
|
|
329 extern UDItype __umulsidi3 (USItype, USItype);
|
|
330 #endif /* __CRIS_arch_version >= 10 */
|
|
331
|
|
332 #define umul_ppmm(w1, w0, u, v) \
|
|
333 do { \
|
|
334 UDItype __x = __umulsidi3 (u, v); \
|
|
335 (w0) = (USItype) (__x); \
|
|
336 (w1) = (USItype) (__x >> 32); \
|
|
337 } while (0)
|
|
338
|
|
339 /* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for
|
|
340 DFmode ("double" intrinsics, avoiding two of the three insns handling
|
|
341 carry), but defining them as open-code C composing and doing the
|
|
342 operation in DImode (UDImode) shows that the DImode needs work:
|
|
343 register pressure from requiring neighboring registers and the
|
|
344 traffic to and from them come to dominate, in the 4.7 series. */
|
|
345
|
|
346 #endif /* defined (__CRIS__) */
|
|
347
|
|
348 #if defined (__hppa) && W_TYPE_SIZE == 32
|
|
349 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
350 __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
|
|
351 : "=r" ((USItype) (sh)), \
|
|
352 "=&r" ((USItype) (sl)) \
|
|
353 : "%rM" ((USItype) (ah)), \
|
|
354 "rM" ((USItype) (bh)), \
|
|
355 "%rM" ((USItype) (al)), \
|
|
356 "rM" ((USItype) (bl)))
|
|
357 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
358 __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \
|
|
359 : "=r" ((USItype) (sh)), \
|
|
360 "=&r" ((USItype) (sl)) \
|
|
361 : "rM" ((USItype) (ah)), \
|
|
362 "rM" ((USItype) (bh)), \
|
|
363 "rM" ((USItype) (al)), \
|
|
364 "rM" ((USItype) (bl)))
|
|
365 #if defined (_PA_RISC1_1)
|
|
366 #define umul_ppmm(w1, w0, u, v) \
|
|
367 do { \
|
|
368 union \
|
|
369 { \
|
|
370 UDItype __f; \
|
|
371 struct {USItype __w1, __w0;} __w1w0; \
|
|
372 } __t; \
|
|
373 __asm__ ("xmpyu %1,%2,%0" \
|
|
374 : "=x" (__t.__f) \
|
|
375 : "x" ((USItype) (u)), \
|
|
376 "x" ((USItype) (v))); \
|
|
377 (w1) = __t.__w1w0.__w1; \
|
|
378 (w0) = __t.__w1w0.__w0; \
|
|
379 } while (0)
|
|
380 #define UMUL_TIME 8
|
|
381 #else
|
|
382 #define UMUL_TIME 30
|
|
383 #endif
|
|
384 #define UDIV_TIME 40
|
|
385 #define count_leading_zeros(count, x) \
|
|
386 do { \
|
|
387 USItype __tmp; \
|
|
388 __asm__ ( \
|
|
389 "ldi 1,%0\n" \
|
|
390 " extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \
|
|
391 " extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n"\
|
|
392 " ldo 16(%0),%0 ; Yes. Perform add.\n" \
|
|
393 " extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \
|
|
394 " extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n"\
|
|
395 " ldo 8(%0),%0 ; Yes. Perform add.\n" \
|
|
396 " extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \
|
|
397 " extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n"\
|
|
398 " ldo 4(%0),%0 ; Yes. Perform add.\n" \
|
|
399 " extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \
|
|
400 " extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n"\
|
|
401 " ldo 2(%0),%0 ; Yes. Perform add.\n" \
|
|
402 " extru %1,30,1,%1 ; Extract bit 1.\n" \
|
|
403 " sub %0,%1,%0 ; Subtract it.\n" \
|
|
404 : "=r" (count), "=r" (__tmp) : "1" (x)); \
|
|
405 } while (0)
|
|
406 #endif
|
|
407
|
|
408 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
|
|
409 #if !defined (__zarch__)
|
|
410 #define smul_ppmm(xh, xl, m0, m1) \
|
|
411 do { \
|
|
412 union {DItype __ll; \
|
|
413 struct {USItype __h, __l;} __i; \
|
|
414 } __x; \
|
|
415 __asm__ ("lr %N0,%1\n\tmr %0,%2" \
|
|
416 : "=&r" (__x.__ll) \
|
|
417 : "r" (m0), "r" (m1)); \
|
|
418 (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
|
|
419 } while (0)
|
|
420 #define sdiv_qrnnd(q, r, n1, n0, d) \
|
|
421 do { \
|
|
422 union {DItype __ll; \
|
|
423 struct {USItype __h, __l;} __i; \
|
|
424 } __x; \
|
|
425 __x.__i.__h = n1; __x.__i.__l = n0; \
|
|
426 __asm__ ("dr %0,%2" \
|
|
427 : "=r" (__x.__ll) \
|
|
428 : "0" (__x.__ll), "r" (d)); \
|
|
429 (q) = __x.__i.__l; (r) = __x.__i.__h; \
|
|
430 } while (0)
|
|
431 #else
|
|
432 #define smul_ppmm(xh, xl, m0, m1) \
|
|
433 do { \
|
|
434 register SItype __r0 __asm__ ("0"); \
|
|
435 register SItype __r1 __asm__ ("1") = (m0); \
|
|
436 \
|
|
437 __asm__ ("mr\t%%r0,%3" \
|
|
438 : "=r" (__r0), "=r" (__r1) \
|
|
439 : "r" (__r1), "r" (m1)); \
|
|
440 (xh) = __r0; (xl) = __r1; \
|
|
441 } while (0)
|
|
442
|
|
443 #define sdiv_qrnnd(q, r, n1, n0, d) \
|
|
444 do { \
|
|
445 register SItype __r0 __asm__ ("0") = (n1); \
|
|
446 register SItype __r1 __asm__ ("1") = (n0); \
|
|
447 \
|
|
448 __asm__ ("dr\t%%r0,%4" \
|
|
449 : "=r" (__r0), "=r" (__r1) \
|
|
450 : "r" (__r0), "r" (__r1), "r" (d)); \
|
|
451 (q) = __r1; (r) = __r0; \
|
|
452 } while (0)
|
|
453 #endif /* __zarch__ */
|
|
454 #endif
|
|
455
|
|
456 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
|
|
457 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
458 __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}" \
|
|
459 : "=r" ((USItype) (sh)), \
|
|
460 "=&r" ((USItype) (sl)) \
|
|
461 : "%0" ((USItype) (ah)), \
|
|
462 "g" ((USItype) (bh)), \
|
|
463 "%1" ((USItype) (al)), \
|
|
464 "g" ((USItype) (bl)))
|
|
465 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
466 __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}" \
|
|
467 : "=r" ((USItype) (sh)), \
|
|
468 "=&r" ((USItype) (sl)) \
|
|
469 : "0" ((USItype) (ah)), \
|
|
470 "g" ((USItype) (bh)), \
|
|
471 "1" ((USItype) (al)), \
|
|
472 "g" ((USItype) (bl)))
|
|
473 #define umul_ppmm(w1, w0, u, v) \
|
|
474 __asm__ ("mul{l} %3" \
|
|
475 : "=a" ((USItype) (w0)), \
|
|
476 "=d" ((USItype) (w1)) \
|
|
477 : "%0" ((USItype) (u)), \
|
|
478 "rm" ((USItype) (v)))
|
|
479 #define udiv_qrnnd(q, r, n1, n0, dv) \
|
|
480 __asm__ ("div{l} %4" \
|
|
481 : "=a" ((USItype) (q)), \
|
|
482 "=d" ((USItype) (r)) \
|
|
483 : "0" ((USItype) (n0)), \
|
|
484 "1" ((USItype) (n1)), \
|
|
485 "rm" ((USItype) (dv)))
|
|
486 #define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
|
|
487 #define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
|
|
488 #define UMUL_TIME 40
|
|
489 #define UDIV_TIME 40
|
|
490 #endif /* 80x86 */
|
|
491
|
|
492 #if defined (__x86_64__) && W_TYPE_SIZE == 64
|
|
493 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
494 __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}" \
|
|
495 : "=r" ((UDItype) (sh)), \
|
|
496 "=&r" ((UDItype) (sl)) \
|
|
497 : "%0" ((UDItype) (ah)), \
|
|
498 "rme" ((UDItype) (bh)), \
|
|
499 "%1" ((UDItype) (al)), \
|
|
500 "rme" ((UDItype) (bl)))
|
|
501 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
502 __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}" \
|
|
503 : "=r" ((UDItype) (sh)), \
|
|
504 "=&r" ((UDItype) (sl)) \
|
|
505 : "0" ((UDItype) (ah)), \
|
|
506 "rme" ((UDItype) (bh)), \
|
|
507 "1" ((UDItype) (al)), \
|
|
508 "rme" ((UDItype) (bl)))
|
|
509 #define umul_ppmm(w1, w0, u, v) \
|
|
510 __asm__ ("mul{q} %3" \
|
|
511 : "=a" ((UDItype) (w0)), \
|
|
512 "=d" ((UDItype) (w1)) \
|
|
513 : "%0" ((UDItype) (u)), \
|
|
514 "rm" ((UDItype) (v)))
|
|
515 #define udiv_qrnnd(q, r, n1, n0, dv) \
|
|
516 __asm__ ("div{q} %4" \
|
|
517 : "=a" ((UDItype) (q)), \
|
|
518 "=d" ((UDItype) (r)) \
|
|
519 : "0" ((UDItype) (n0)), \
|
|
520 "1" ((UDItype) (n1)), \
|
|
521 "rm" ((UDItype) (dv)))
|
|
522 #define count_leading_zeros(count, x) ((count) = __builtin_clzll (x))
|
|
523 #define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x))
|
|
524 #define UMUL_TIME 40
|
|
525 #define UDIV_TIME 40
|
|
526 #endif /* x86_64 */
|
|
527
|
|
528 #if defined (__i960__) && W_TYPE_SIZE == 32
|
|
529 #define umul_ppmm(w1, w0, u, v) \
|
|
530 ({union {UDItype __ll; \
|
|
531 struct {USItype __l, __h;} __i; \
|
|
532 } __xx; \
|
|
533 __asm__ ("emul %2,%1,%0" \
|
|
534 : "=d" (__xx.__ll) \
|
|
535 : "%dI" ((USItype) (u)), \
|
|
536 "dI" ((USItype) (v))); \
|
|
537 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
|
|
538 #define __umulsidi3(u, v) \
|
|
539 ({UDItype __w; \
|
|
540 __asm__ ("emul %2,%1,%0" \
|
|
541 : "=d" (__w) \
|
|
542 : "%dI" ((USItype) (u)), \
|
|
543 "dI" ((USItype) (v))); \
|
|
544 __w; })
|
|
545 #endif /* __i960__ */
|
|
546
|
|
547 #if defined (__ia64) && W_TYPE_SIZE == 64
|
|
548 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
|
|
549 "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic
|
|
550 code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
|
|
551 register, which takes an extra cycle. */
|
|
552 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
553 do { \
|
|
554 UWtype __x; \
|
|
555 __x = (al) - (bl); \
|
|
556 if ((al) < (bl)) \
|
|
557 (sh) = (ah) - (bh) - 1; \
|
|
558 else \
|
|
559 (sh) = (ah) - (bh); \
|
|
560 (sl) = __x; \
|
|
561 } while (0)
|
|
562
|
|
563 /* Do both product parts in assembly, since that gives better code with
|
|
564 all gcc versions. Some callers will just use the upper part, and in
|
|
565 that situation we waste an instruction, but not any cycles. */
|
|
566 #define umul_ppmm(ph, pl, m0, m1) \
|
|
567 __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \
|
|
568 : "=&f" (ph), "=f" (pl) \
|
|
569 : "f" (m0), "f" (m1))
|
|
570 #define count_leading_zeros(count, x) \
|
|
571 do { \
|
|
572 UWtype _x = (x), _y, _a, _c; \
|
|
573 __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \
|
|
574 __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \
|
|
575 _c = (_a - 1) << 3; \
|
|
576 _x >>= _c; \
|
|
577 if (_x >= 1 << 4) \
|
|
578 _x >>= 4, _c += 4; \
|
|
579 if (_x >= 1 << 2) \
|
|
580 _x >>= 2, _c += 2; \
|
|
581 _c += _x >> 1; \
|
|
582 (count) = W_TYPE_SIZE - 1 - _c; \
|
|
583 } while (0)
|
|
584 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
|
|
585 based, and we don't need a special case for x==0 here */
|
|
586 #define count_trailing_zeros(count, x) \
|
|
587 do { \
|
|
588 UWtype __ctz_x = (x); \
|
|
589 __asm__ ("popcnt %0 = %1" \
|
|
590 : "=r" (count) \
|
|
591 : "r" ((__ctz_x-1) & ~__ctz_x)); \
|
|
592 } while (0)
|
|
593 #define UMUL_TIME 14
|
|
594 #endif
|
|
595
|
|
596 #if defined (__M32R__) && W_TYPE_SIZE == 32
|
|
597 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
598 /* The cmp clears the condition bit. */ \
|
|
599 __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3" \
|
|
600 : "=r" ((USItype) (sh)), \
|
|
601 "=&r" ((USItype) (sl)) \
|
|
602 : "0" ((USItype) (ah)), \
|
|
603 "r" ((USItype) (bh)), \
|
|
604 "1" ((USItype) (al)), \
|
|
605 "r" ((USItype) (bl)) \
|
|
606 : "cbit")
|
|
607 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
608 /* The cmp clears the condition bit. */ \
|
|
609 __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3" \
|
|
610 : "=r" ((USItype) (sh)), \
|
|
611 "=&r" ((USItype) (sl)) \
|
|
612 : "0" ((USItype) (ah)), \
|
|
613 "r" ((USItype) (bh)), \
|
|
614 "1" ((USItype) (al)), \
|
|
615 "r" ((USItype) (bl)) \
|
|
616 : "cbit")
|
|
617 #endif /* __M32R__ */
|
|
618
|
|
619 #if defined (__mc68000__) && W_TYPE_SIZE == 32
|
|
620 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
621 __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \
|
|
622 : "=d" ((USItype) (sh)), \
|
|
623 "=&d" ((USItype) (sl)) \
|
|
624 : "%0" ((USItype) (ah)), \
|
|
625 "d" ((USItype) (bh)), \
|
|
626 "%1" ((USItype) (al)), \
|
|
627 "g" ((USItype) (bl)))
|
|
628 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
629 __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \
|
|
630 : "=d" ((USItype) (sh)), \
|
|
631 "=&d" ((USItype) (sl)) \
|
|
632 : "0" ((USItype) (ah)), \
|
|
633 "d" ((USItype) (bh)), \
|
|
634 "1" ((USItype) (al)), \
|
|
635 "g" ((USItype) (bl)))
|
|
636
|
|
637 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r. */
|
|
638 #if (defined (__mc68020__) && !defined (__mc68060__))
|
|
639 #define umul_ppmm(w1, w0, u, v) \
|
|
640 __asm__ ("mulu%.l %3,%1:%0" \
|
|
641 : "=d" ((USItype) (w0)), \
|
|
642 "=d" ((USItype) (w1)) \
|
|
643 : "%0" ((USItype) (u)), \
|
|
644 "dmi" ((USItype) (v)))
|
|
645 #define UMUL_TIME 45
|
|
646 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
647 __asm__ ("divu%.l %4,%1:%0" \
|
|
648 : "=d" ((USItype) (q)), \
|
|
649 "=d" ((USItype) (r)) \
|
|
650 : "0" ((USItype) (n0)), \
|
|
651 "1" ((USItype) (n1)), \
|
|
652 "dmi" ((USItype) (d)))
|
|
653 #define UDIV_TIME 90
|
|
654 #define sdiv_qrnnd(q, r, n1, n0, d) \
|
|
655 __asm__ ("divs%.l %4,%1:%0" \
|
|
656 : "=d" ((USItype) (q)), \
|
|
657 "=d" ((USItype) (r)) \
|
|
658 : "0" ((USItype) (n0)), \
|
|
659 "1" ((USItype) (n1)), \
|
|
660 "dmi" ((USItype) (d)))
|
|
661
|
|
662 #elif defined (__mcoldfire__) /* not mc68020 */
|
|
663
|
|
664 #define umul_ppmm(xh, xl, a, b) \
|
|
665 __asm__ ("| Inlined umul_ppmm\n" \
|
|
666 " move%.l %2,%/d0\n" \
|
|
667 " move%.l %3,%/d1\n" \
|
|
668 " move%.l %/d0,%/d2\n" \
|
|
669 " swap %/d0\n" \
|
|
670 " move%.l %/d1,%/d3\n" \
|
|
671 " swap %/d1\n" \
|
|
672 " move%.w %/d2,%/d4\n" \
|
|
673 " mulu %/d3,%/d4\n" \
|
|
674 " mulu %/d1,%/d2\n" \
|
|
675 " mulu %/d0,%/d3\n" \
|
|
676 " mulu %/d0,%/d1\n" \
|
|
677 " move%.l %/d4,%/d0\n" \
|
|
678 " clr%.w %/d0\n" \
|
|
679 " swap %/d0\n" \
|
|
680 " add%.l %/d0,%/d2\n" \
|
|
681 " add%.l %/d3,%/d2\n" \
|
|
682 " jcc 1f\n" \
|
|
683 " add%.l %#65536,%/d1\n" \
|
|
684 "1: swap %/d2\n" \
|
|
685 " moveq %#0,%/d0\n" \
|
|
686 " move%.w %/d2,%/d0\n" \
|
|
687 " move%.w %/d4,%/d2\n" \
|
|
688 " move%.l %/d2,%1\n" \
|
|
689 " add%.l %/d1,%/d0\n" \
|
|
690 " move%.l %/d0,%0" \
|
|
691 : "=g" ((USItype) (xh)), \
|
|
692 "=g" ((USItype) (xl)) \
|
|
693 : "g" ((USItype) (a)), \
|
|
694 "g" ((USItype) (b)) \
|
|
695 : "d0", "d1", "d2", "d3", "d4")
|
|
696 #define UMUL_TIME 100
|
|
697 #define UDIV_TIME 400
|
|
698 #else /* not ColdFire */
|
|
699 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX. */
|
|
700 #define umul_ppmm(xh, xl, a, b) \
|
|
701 __asm__ ("| Inlined umul_ppmm\n" \
|
|
702 " move%.l %2,%/d0\n" \
|
|
703 " move%.l %3,%/d1\n" \
|
|
704 " move%.l %/d0,%/d2\n" \
|
|
705 " swap %/d0\n" \
|
|
706 " move%.l %/d1,%/d3\n" \
|
|
707 " swap %/d1\n" \
|
|
708 " move%.w %/d2,%/d4\n" \
|
|
709 " mulu %/d3,%/d4\n" \
|
|
710 " mulu %/d1,%/d2\n" \
|
|
711 " mulu %/d0,%/d3\n" \
|
|
712 " mulu %/d0,%/d1\n" \
|
|
713 " move%.l %/d4,%/d0\n" \
|
|
714 " eor%.w %/d0,%/d0\n" \
|
|
715 " swap %/d0\n" \
|
|
716 " add%.l %/d0,%/d2\n" \
|
|
717 " add%.l %/d3,%/d2\n" \
|
|
718 " jcc 1f\n" \
|
|
719 " add%.l %#65536,%/d1\n" \
|
|
720 "1: swap %/d2\n" \
|
|
721 " moveq %#0,%/d0\n" \
|
|
722 " move%.w %/d2,%/d0\n" \
|
|
723 " move%.w %/d4,%/d2\n" \
|
|
724 " move%.l %/d2,%1\n" \
|
|
725 " add%.l %/d1,%/d0\n" \
|
|
726 " move%.l %/d0,%0" \
|
|
727 : "=g" ((USItype) (xh)), \
|
|
728 "=g" ((USItype) (xl)) \
|
|
729 : "g" ((USItype) (a)), \
|
|
730 "g" ((USItype) (b)) \
|
|
731 : "d0", "d1", "d2", "d3", "d4")
|
|
732 #define UMUL_TIME 100
|
|
733 #define UDIV_TIME 400
|
|
734
|
|
735 #endif /* not mc68020 */
|
|
736
|
|
737 /* The '020, '030, '040 and '060 have bitfield insns.
|
|
738 cpu32 disguises as a 68020, but lacks them. */
|
|
739 #if defined (__mc68020__) && !defined (__mcpu32__)
|
|
740 #define count_leading_zeros(count, x) \
|
|
741 __asm__ ("bfffo %1{%b2:%b2},%0" \
|
|
742 : "=d" ((USItype) (count)) \
|
|
743 : "od" ((USItype) (x)), "n" (0))
|
|
744 /* Some ColdFire architectures have a ff1 instruction supported via
|
|
745 __builtin_clz. */
|
|
746 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
|
|
747 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
|
|
748 #define COUNT_LEADING_ZEROS_0 32
|
|
749 #endif
|
|
750 #endif /* mc68000 */
|
|
751
|
|
752 #if defined (__m88000__) && W_TYPE_SIZE == 32
|
|
753 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
754 __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \
|
|
755 : "=r" ((USItype) (sh)), \
|
|
756 "=&r" ((USItype) (sl)) \
|
|
757 : "%rJ" ((USItype) (ah)), \
|
|
758 "rJ" ((USItype) (bh)), \
|
|
759 "%rJ" ((USItype) (al)), \
|
|
760 "rJ" ((USItype) (bl)))
|
|
761 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
762 __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \
|
|
763 : "=r" ((USItype) (sh)), \
|
|
764 "=&r" ((USItype) (sl)) \
|
|
765 : "rJ" ((USItype) (ah)), \
|
|
766 "rJ" ((USItype) (bh)), \
|
|
767 "rJ" ((USItype) (al)), \
|
|
768 "rJ" ((USItype) (bl)))
|
|
769 #define count_leading_zeros(count, x) \
|
|
770 do { \
|
|
771 USItype __cbtmp; \
|
|
772 __asm__ ("ff1 %0,%1" \
|
|
773 : "=r" (__cbtmp) \
|
|
774 : "r" ((USItype) (x))); \
|
|
775 (count) = __cbtmp ^ 31; \
|
|
776 } while (0)
|
|
777 #define COUNT_LEADING_ZEROS_0 63 /* sic */
|
|
778 #if defined (__mc88110__)
|
|
779 #define umul_ppmm(wh, wl, u, v) \
|
|
780 do { \
|
|
781 union {UDItype __ll; \
|
|
782 struct {USItype __h, __l;} __i; \
|
|
783 } __xx; \
|
|
784 __asm__ ("mulu.d %0,%1,%2" \
|
|
785 : "=r" (__xx.__ll) \
|
|
786 : "r" ((USItype) (u)), \
|
|
787 "r" ((USItype) (v))); \
|
|
788 (wh) = __xx.__i.__h; \
|
|
789 (wl) = __xx.__i.__l; \
|
|
790 } while (0)
|
|
791 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
792 ({union {UDItype __ll; \
|
|
793 struct {USItype __h, __l;} __i; \
|
|
794 } __xx; \
|
|
795 USItype __q; \
|
|
796 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
|
|
797 __asm__ ("divu.d %0,%1,%2" \
|
|
798 : "=r" (__q) \
|
|
799 : "r" (__xx.__ll), \
|
|
800 "r" ((USItype) (d))); \
|
|
801 (r) = (n0) - __q * (d); (q) = __q; })
|
|
802 #define UMUL_TIME 5
|
|
803 #define UDIV_TIME 25
|
|
804 #else
|
|
805 #define UMUL_TIME 17
|
|
806 #define UDIV_TIME 150
|
|
807 #endif /* __mc88110__ */
|
|
808 #endif /* __m88000__ */
|
|
809
|
|
810 #if defined (__mn10300__)
|
|
811 # if defined (__AM33__)
|
|
812 # define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
|
|
813 # define umul_ppmm(w1, w0, u, v) \
|
|
814 asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
|
|
815 # define smul_ppmm(w1, w0, u, v) \
|
|
816 asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
|
|
817 # else
|
|
818 # define umul_ppmm(w1, w0, u, v) \
|
|
819 asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
|
|
820 # define smul_ppmm(w1, w0, u, v) \
|
|
821 asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
|
|
822 # endif
|
|
823 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
824 do { \
|
|
825 DWunion __s, __a, __b; \
|
|
826 __a.s.low = (al); __a.s.high = (ah); \
|
|
827 __b.s.low = (bl); __b.s.high = (bh); \
|
|
828 __s.ll = __a.ll + __b.ll; \
|
|
829 (sl) = __s.s.low; (sh) = __s.s.high; \
|
|
830 } while (0)
|
|
831 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
832 do { \
|
|
833 DWunion __s, __a, __b; \
|
|
834 __a.s.low = (al); __a.s.high = (ah); \
|
|
835 __b.s.low = (bl); __b.s.high = (bh); \
|
|
836 __s.ll = __a.ll - __b.ll; \
|
|
837 (sl) = __s.s.low; (sh) = __s.s.high; \
|
|
838 } while (0)
|
|
839 # define udiv_qrnnd(q, r, nh, nl, d) \
|
|
840 asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
|
|
841 # define sdiv_qrnnd(q, r, nh, nl, d) \
|
|
842 asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
|
|
843 # define UMUL_TIME 3
|
|
844 # define UDIV_TIME 38
|
|
845 #endif
|
|
846
|
|
847 #if defined (__mips__) && W_TYPE_SIZE == 32
|
|
848 #define umul_ppmm(w1, w0, u, v) \
|
|
849 do { \
|
|
850 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
|
|
851 (w1) = (USItype) (__x >> 32); \
|
|
852 (w0) = (USItype) (__x); \
|
|
853 } while (0)
|
|
854 #define UMUL_TIME 10
|
|
855 #define UDIV_TIME 100
|
|
856
|
|
857 #if (__mips == 32 || __mips == 64) && ! defined (__mips16)
|
|
858 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
|
|
859 #define COUNT_LEADING_ZEROS_0 32
|
|
860 #endif
|
|
861 #endif /* __mips__ */
|
|
862
|
|
863 /* FIXME: We should test _IBMR2 here when we add assembly support for the
|
|
864 system vendor compilers.
|
|
865 FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good
|
|
866 enough, since that hits ARM and m68k too. */
|
|
867 #if (defined (_ARCH_PPC) /* AIX */ \
|
|
868 || defined (__powerpc__) /* gcc */ \
|
|
869 || defined (__POWERPC__) /* BEOS */ \
|
|
870 || defined (__ppc__) /* Darwin */ \
|
|
871 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
|
|
872 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
|
|
873 && CPU_FAMILY == PPC) \
|
|
874 ) && W_TYPE_SIZE == 32
|
|
875 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
876 do { \
|
|
877 if (__builtin_constant_p (bh) && (bh) == 0) \
|
|
878 __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
|
|
879 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
|
880 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
|
|
881 __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
|
|
882 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
|
883 else \
|
|
884 __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
|
|
885 : "=r" (sh), "=&r" (sl) \
|
|
886 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
|
|
887 } while (0)
|
|
888 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
889 do { \
|
|
890 if (__builtin_constant_p (ah) && (ah) == 0) \
|
|
891 __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
|
|
892 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
|
893 else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
|
|
894 __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
|
|
895 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
|
896 else if (__builtin_constant_p (bh) && (bh) == 0) \
|
|
897 __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
|
|
898 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
|
899 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
|
|
900 __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
|
|
901 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
|
902 else \
|
|
903 __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
|
|
904 : "=r" (sh), "=&r" (sl) \
|
|
905 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
|
|
906 } while (0)
|
|
907 #define count_leading_zeros(count, x) \
|
|
908 __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
|
|
909 #define COUNT_LEADING_ZEROS_0 32
|
|
910 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
|
|
911 || defined (__ppc__) \
|
|
912 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
|
|
913 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
|
|
914 && CPU_FAMILY == PPC)
|
|
915 #define umul_ppmm(ph, pl, m0, m1) \
|
|
916 do { \
|
|
917 USItype __m0 = (m0), __m1 = (m1); \
|
|
918 __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
|
919 (pl) = __m0 * __m1; \
|
|
920 } while (0)
|
|
921 #define UMUL_TIME 15
|
|
922 #define smul_ppmm(ph, pl, m0, m1) \
|
|
923 do { \
|
|
924 SItype __m0 = (m0), __m1 = (m1); \
|
|
925 __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
|
926 (pl) = __m0 * __m1; \
|
|
927 } while (0)
|
|
928 #define SMUL_TIME 14
|
|
929 #define UDIV_TIME 120
|
|
930 #endif
|
|
931 #endif /* 32-bit POWER architecture variants. */
|
|
932
|
|
933 /* We should test _IBMR2 here when we add assembly support for the system
|
|
934 vendor compilers. */
|
|
935 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
|
|
936 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
937 do { \
|
|
938 if (__builtin_constant_p (bh) && (bh) == 0) \
|
|
939 __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
|
|
940 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
|
941 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
|
|
942 __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
|
|
943 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
|
944 else \
|
|
945 __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
|
|
946 : "=r" (sh), "=&r" (sl) \
|
|
947 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
|
|
948 } while (0)
|
|
949 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
950 do { \
|
|
951 if (__builtin_constant_p (ah) && (ah) == 0) \
|
|
952 __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
|
|
953 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
|
954 else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
|
|
955 __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
|
|
956 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
|
957 else if (__builtin_constant_p (bh) && (bh) == 0) \
|
|
958 __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
|
|
959 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
|
960 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
|
|
961 __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
|
|
962 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
|
963 else \
|
|
964 __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
|
|
965 : "=r" (sh), "=&r" (sl) \
|
|
966 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
|
|
967 } while (0)
|
|
968 #define count_leading_zeros(count, x) \
|
|
969 __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
|
|
970 #define COUNT_LEADING_ZEROS_0 64
|
|
971 #define umul_ppmm(ph, pl, m0, m1) \
|
|
972 do { \
|
|
973 UDItype __m0 = (m0), __m1 = (m1); \
|
|
974 __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
|
975 (pl) = __m0 * __m1; \
|
|
976 } while (0)
|
|
977 #define UMUL_TIME 15
|
|
978 #define smul_ppmm(ph, pl, m0, m1) \
|
|
979 do { \
|
|
980 DItype __m0 = (m0), __m1 = (m1); \
|
|
981 __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
|
982 (pl) = __m0 * __m1; \
|
|
983 } while (0)
|
|
984 #define SMUL_TIME 14 /* ??? */
|
|
985 #define UDIV_TIME 120 /* ??? */
|
|
986 #endif /* 64-bit PowerPC. */
|
|
987
|
|
988 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
|
|
989 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
990 __asm__ ("a %1,%5\n\tae %0,%3" \
|
|
991 : "=r" ((USItype) (sh)), \
|
|
992 "=&r" ((USItype) (sl)) \
|
|
993 : "%0" ((USItype) (ah)), \
|
|
994 "r" ((USItype) (bh)), \
|
|
995 "%1" ((USItype) (al)), \
|
|
996 "r" ((USItype) (bl)))
|
|
997 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
998 __asm__ ("s %1,%5\n\tse %0,%3" \
|
|
999 : "=r" ((USItype) (sh)), \
|
|
1000 "=&r" ((USItype) (sl)) \
|
|
1001 : "0" ((USItype) (ah)), \
|
|
1002 "r" ((USItype) (bh)), \
|
|
1003 "1" ((USItype) (al)), \
|
|
1004 "r" ((USItype) (bl)))
|
|
1005 #define umul_ppmm(ph, pl, m0, m1) \
|
|
1006 do { \
|
|
1007 USItype __m0 = (m0), __m1 = (m1); \
|
|
1008 __asm__ ( \
|
|
1009 "s r2,r2\n" \
|
|
1010 " mts r10,%2\n" \
|
|
1011 " m r2,%3\n" \
|
|
1012 " m r2,%3\n" \
|
|
1013 " m r2,%3\n" \
|
|
1014 " m r2,%3\n" \
|
|
1015 " m r2,%3\n" \
|
|
1016 " m r2,%3\n" \
|
|
1017 " m r2,%3\n" \
|
|
1018 " m r2,%3\n" \
|
|
1019 " m r2,%3\n" \
|
|
1020 " m r2,%3\n" \
|
|
1021 " m r2,%3\n" \
|
|
1022 " m r2,%3\n" \
|
|
1023 " m r2,%3\n" \
|
|
1024 " m r2,%3\n" \
|
|
1025 " m r2,%3\n" \
|
|
1026 " m r2,%3\n" \
|
|
1027 " cas %0,r2,r0\n" \
|
|
1028 " mfs r10,%1" \
|
|
1029 : "=r" ((USItype) (ph)), \
|
|
1030 "=r" ((USItype) (pl)) \
|
|
1031 : "%r" (__m0), \
|
|
1032 "r" (__m1) \
|
|
1033 : "r2"); \
|
|
1034 (ph) += ((((SItype) __m0 >> 31) & __m1) \
|
|
1035 + (((SItype) __m1 >> 31) & __m0)); \
|
|
1036 } while (0)
|
|
1037 #define UMUL_TIME 20
|
|
1038 #define UDIV_TIME 200
|
|
1039 #define count_leading_zeros(count, x) \
|
|
1040 do { \
|
|
1041 if ((x) >= 0x10000) \
|
|
1042 __asm__ ("clz %0,%1" \
|
|
1043 : "=r" ((USItype) (count)) \
|
|
1044 : "r" ((USItype) (x) >> 16)); \
|
|
1045 else \
|
|
1046 { \
|
|
1047 __asm__ ("clz %0,%1" \
|
|
1048 : "=r" ((USItype) (count)) \
|
|
1049 : "r" ((USItype) (x))); \
|
|
1050 (count) += 16; \
|
|
1051 } \
|
|
1052 } while (0)
|
|
1053 #endif
|
|
1054
|
131
|
1055 #if defined(__riscv)
|
|
1056 #ifdef __riscv_mul
|
|
1057 #define __umulsidi3(u,v) ((UDWtype)(UWtype)(u) * (UWtype)(v))
|
|
1058 #define __muluw3(a, b) ((UWtype)(a) * (UWtype)(b))
|
|
1059 #else
|
|
1060 #if __riscv_xlen == 32
|
|
1061 #define MULUW3 "call __mulsi3"
|
|
1062 #elif __riscv_xlen == 64
|
|
1063 #define MULUW3 "call __muldi3"
|
|
1064 #else
|
|
1065 #error unsupport xlen
|
|
1066 #endif /* __riscv_xlen */
|
|
1067 /* We rely on the fact that MULUW3 doesn't clobber the t-registers.
|
|
1068 It can get better register allocation result. */
|
|
1069 #define __muluw3(a, b) \
|
|
1070 ({ \
|
|
1071 register UWtype __op0 asm ("a0") = a; \
|
|
1072 register UWtype __op1 asm ("a1") = b; \
|
|
1073 asm volatile (MULUW3 \
|
|
1074 : "+r" (__op0), "+r" (__op1) \
|
|
1075 : \
|
|
1076 : "ra", "a2", "a3"); \
|
|
1077 __op0; \
|
|
1078 })
|
|
1079 #endif /* __riscv_mul */
|
|
1080 #define umul_ppmm(w1, w0, u, v) \
|
|
1081 do { \
|
|
1082 UWtype __x0, __x1, __x2, __x3; \
|
|
1083 UHWtype __ul, __vl, __uh, __vh; \
|
|
1084 \
|
|
1085 __ul = __ll_lowpart (u); \
|
|
1086 __uh = __ll_highpart (u); \
|
|
1087 __vl = __ll_lowpart (v); \
|
|
1088 __vh = __ll_highpart (v); \
|
|
1089 \
|
|
1090 __x0 = __muluw3 (__ul, __vl); \
|
|
1091 __x1 = __muluw3 (__ul, __vh); \
|
|
1092 __x2 = __muluw3 (__uh, __vl); \
|
|
1093 __x3 = __muluw3 (__uh, __vh); \
|
|
1094 \
|
|
1095 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
|
|
1096 __x1 += __x2; /* but this indeed can */ \
|
|
1097 if (__x1 < __x2) /* did we get it? */ \
|
|
1098 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
|
|
1099 \
|
|
1100 (w1) = __x3 + __ll_highpart (__x1); \
|
|
1101 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
|
|
1102 } while (0)
|
|
1103 #endif /* __riscv */
|
|
1104
|
111
|
1105 #if defined(__sh__) && W_TYPE_SIZE == 32
|
|
1106 #ifndef __sh1__
|
|
1107 #define umul_ppmm(w1, w0, u, v) \
|
|
1108 __asm__ ( \
|
|
1109 "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0 mach,%0" \
|
|
1110 : "=r<" ((USItype)(w1)), \
|
|
1111 "=r<" ((USItype)(w0)) \
|
|
1112 : "r" ((USItype)(u)), \
|
|
1113 "r" ((USItype)(v)) \
|
|
1114 : "macl", "mach")
|
|
1115 #define UMUL_TIME 5
|
|
1116 #endif
|
|
1117
|
|
1118 /* This is the same algorithm as __udiv_qrnnd_c. */
|
|
1119 #define UDIV_NEEDS_NORMALIZATION 1
|
|
1120
|
|
1121 #ifdef __FDPIC__
|
|
1122 /* FDPIC needs a special version of the asm fragment to extract the
|
|
1123 code address from the function descriptor. __udiv_qrnnd_16 is
|
|
1124 assumed to be local and not to use the GOT, so loading r12 is
|
|
1125 not needed. */
|
|
1126 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
1127 do { \
|
|
1128 extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
|
|
1129 __attribute__ ((visibility ("hidden"))); \
|
|
1130 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
|
|
1131 __asm__ ( \
|
|
1132 "mov%M4 %4,r5\n" \
|
|
1133 " swap.w %3,r4\n" \
|
|
1134 " swap.w r5,r6\n" \
|
|
1135 " mov.l @%5,r2\n" \
|
|
1136 " jsr @r2\n" \
|
|
1137 " shll16 r6\n" \
|
|
1138 " swap.w r4,r4\n" \
|
|
1139 " mov.l @%5,r2\n" \
|
|
1140 " jsr @r2\n" \
|
|
1141 " swap.w r1,%0\n" \
|
|
1142 " or r1,%0" \
|
|
1143 : "=r" (q), "=&z" (r) \
|
|
1144 : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
|
|
1145 : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
|
|
1146 } while (0)
|
|
1147 #else
|
|
1148 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
1149 do { \
|
|
1150 extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
|
|
1151 __attribute__ ((visibility ("hidden"))); \
|
|
1152 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
|
|
1153 __asm__ ( \
|
|
1154 "mov%M4 %4,r5\n" \
|
|
1155 " swap.w %3,r4\n" \
|
|
1156 " swap.w r5,r6\n" \
|
|
1157 " jsr @%5\n" \
|
|
1158 " shll16 r6\n" \
|
|
1159 " swap.w r4,r4\n" \
|
|
1160 " jsr @%5\n" \
|
|
1161 " swap.w r1,%0\n" \
|
|
1162 " or r1,%0" \
|
|
1163 : "=r" (q), "=&z" (r) \
|
|
1164 : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
|
|
1165 : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
|
|
1166 } while (0)
|
|
1167 #endif /* __FDPIC__ */
|
|
1168
|
|
1169 #define UDIV_TIME 80
|
|
1170
|
|
1171 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1172 __asm__ ("clrt;subc %5,%1; subc %4,%0" \
|
|
1173 : "=r" (sh), "=r" (sl) \
|
|
1174 : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
|
|
1175
|
|
1176 #endif /* __sh__ */
|
|
1177
|
|
1178 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
|
|
1179 && W_TYPE_SIZE == 32
|
|
1180 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1181 __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
|
|
1182 : "=r" ((USItype) (sh)), \
|
|
1183 "=&r" ((USItype) (sl)) \
|
|
1184 : "%rJ" ((USItype) (ah)), \
|
|
1185 "rI" ((USItype) (bh)), \
|
|
1186 "%rJ" ((USItype) (al)), \
|
|
1187 "rI" ((USItype) (bl)) \
|
|
1188 __CLOBBER_CC)
|
|
1189 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1190 __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
|
|
1191 : "=r" ((USItype) (sh)), \
|
|
1192 "=&r" ((USItype) (sl)) \
|
|
1193 : "rJ" ((USItype) (ah)), \
|
|
1194 "rI" ((USItype) (bh)), \
|
|
1195 "rJ" ((USItype) (al)), \
|
|
1196 "rI" ((USItype) (bl)) \
|
|
1197 __CLOBBER_CC)
|
|
1198 #if defined (__sparc_v9__)
|
|
1199 #define umul_ppmm(w1, w0, u, v) \
|
|
1200 do { \
|
|
1201 register USItype __g1 asm ("g1"); \
|
|
1202 __asm__ ("umul\t%2,%3,%1\n\t" \
|
|
1203 "srlx\t%1, 32, %0" \
|
|
1204 : "=r" ((USItype) (w1)), \
|
|
1205 "=r" (__g1) \
|
|
1206 : "r" ((USItype) (u)), \
|
|
1207 "r" ((USItype) (v))); \
|
|
1208 (w0) = __g1; \
|
|
1209 } while (0)
|
|
1210 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
|
|
1211 __asm__ ("mov\t%2,%%y\n\t" \
|
|
1212 "udiv\t%3,%4,%0\n\t" \
|
|
1213 "umul\t%0,%4,%1\n\t" \
|
|
1214 "sub\t%3,%1,%1" \
|
|
1215 : "=&r" ((USItype) (__q)), \
|
|
1216 "=&r" ((USItype) (__r)) \
|
|
1217 : "r" ((USItype) (__n1)), \
|
|
1218 "r" ((USItype) (__n0)), \
|
|
1219 "r" ((USItype) (__d)))
|
|
1220 #else
|
|
1221 #if defined (__sparc_v8__)
|
|
1222 #define umul_ppmm(w1, w0, u, v) \
|
|
1223 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
|
|
1224 : "=r" ((USItype) (w1)), \
|
|
1225 "=r" ((USItype) (w0)) \
|
|
1226 : "r" ((USItype) (u)), \
|
|
1227 "r" ((USItype) (v)))
|
|
1228 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
|
|
1229 __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
|
|
1230 : "=&r" ((USItype) (__q)), \
|
|
1231 "=&r" ((USItype) (__r)) \
|
|
1232 : "r" ((USItype) (__n1)), \
|
|
1233 "r" ((USItype) (__n0)), \
|
|
1234 "r" ((USItype) (__d)))
|
|
1235 #else
|
|
1236 #if defined (__sparclite__)
|
|
1237 /* This has hardware multiply but not divide. It also has two additional
|
|
1238 instructions scan (ffs from high bit) and divscc. */
|
|
1239 #define umul_ppmm(w1, w0, u, v) \
|
|
1240 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
|
|
1241 : "=r" ((USItype) (w1)), \
|
|
1242 "=r" ((USItype) (w0)) \
|
|
1243 : "r" ((USItype) (u)), \
|
|
1244 "r" ((USItype) (v)))
|
|
1245 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
1246 __asm__ ("! Inlined udiv_qrnnd\n" \
|
|
1247 " wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
|
|
1248 " tst %%g0\n" \
|
|
1249 " divscc %3,%4,%%g1\n" \
|
|
1250 " divscc %%g1,%4,%%g1\n" \
|
|
1251 " divscc %%g1,%4,%%g1\n" \
|
|
1252 " divscc %%g1,%4,%%g1\n" \
|
|
1253 " divscc %%g1,%4,%%g1\n" \
|
|
1254 " divscc %%g1,%4,%%g1\n" \
|
|
1255 " divscc %%g1,%4,%%g1\n" \
|
|
1256 " divscc %%g1,%4,%%g1\n" \
|
|
1257 " divscc %%g1,%4,%%g1\n" \
|
|
1258 " divscc %%g1,%4,%%g1\n" \
|
|
1259 " divscc %%g1,%4,%%g1\n" \
|
|
1260 " divscc %%g1,%4,%%g1\n" \
|
|
1261 " divscc %%g1,%4,%%g1\n" \
|
|
1262 " divscc %%g1,%4,%%g1\n" \
|
|
1263 " divscc %%g1,%4,%%g1\n" \
|
|
1264 " divscc %%g1,%4,%%g1\n" \
|
|
1265 " divscc %%g1,%4,%%g1\n" \
|
|
1266 " divscc %%g1,%4,%%g1\n" \
|
|
1267 " divscc %%g1,%4,%%g1\n" \
|
|
1268 " divscc %%g1,%4,%%g1\n" \
|
|
1269 " divscc %%g1,%4,%%g1\n" \
|
|
1270 " divscc %%g1,%4,%%g1\n" \
|
|
1271 " divscc %%g1,%4,%%g1\n" \
|
|
1272 " divscc %%g1,%4,%%g1\n" \
|
|
1273 " divscc %%g1,%4,%%g1\n" \
|
|
1274 " divscc %%g1,%4,%%g1\n" \
|
|
1275 " divscc %%g1,%4,%%g1\n" \
|
|
1276 " divscc %%g1,%4,%%g1\n" \
|
|
1277 " divscc %%g1,%4,%%g1\n" \
|
|
1278 " divscc %%g1,%4,%%g1\n" \
|
|
1279 " divscc %%g1,%4,%%g1\n" \
|
|
1280 " divscc %%g1,%4,%0\n" \
|
|
1281 " rd %%y,%1\n" \
|
|
1282 " bl,a 1f\n" \
|
|
1283 " add %1,%4,%1\n" \
|
|
1284 "1: ! End of inline udiv_qrnnd" \
|
|
1285 : "=r" ((USItype) (q)), \
|
|
1286 "=r" ((USItype) (r)) \
|
|
1287 : "r" ((USItype) (n1)), \
|
|
1288 "r" ((USItype) (n0)), \
|
|
1289 "rI" ((USItype) (d)) \
|
|
1290 : "g1" __AND_CLOBBER_CC)
|
|
1291 #define UDIV_TIME 37
|
|
1292 #define count_leading_zeros(count, x) \
|
|
1293 do { \
|
|
1294 __asm__ ("scan %1,1,%0" \
|
|
1295 : "=r" ((USItype) (count)) \
|
|
1296 : "r" ((USItype) (x))); \
|
|
1297 } while (0)
|
|
1298 /* Early sparclites return 63 for an argument of 0, but they warn that future
|
|
1299 implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
|
|
1300 undefined. */
|
|
1301 #else
|
|
1302 /* SPARC without integer multiplication and divide instructions.
|
|
1303 (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
|
|
1304 #define umul_ppmm(w1, w0, u, v) \
|
|
1305 __asm__ ("! Inlined umul_ppmm\n" \
|
|
1306 " wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n"\
|
|
1307 " sra %3,31,%%o5 ! Don't move this insn\n" \
|
|
1308 " and %2,%%o5,%%o5 ! Don't move this insn\n" \
|
|
1309 " andcc %%g0,0,%%g1 ! Don't move this insn\n" \
|
|
1310 " mulscc %%g1,%3,%%g1\n" \
|
|
1311 " mulscc %%g1,%3,%%g1\n" \
|
|
1312 " mulscc %%g1,%3,%%g1\n" \
|
|
1313 " mulscc %%g1,%3,%%g1\n" \
|
|
1314 " mulscc %%g1,%3,%%g1\n" \
|
|
1315 " mulscc %%g1,%3,%%g1\n" \
|
|
1316 " mulscc %%g1,%3,%%g1\n" \
|
|
1317 " mulscc %%g1,%3,%%g1\n" \
|
|
1318 " mulscc %%g1,%3,%%g1\n" \
|
|
1319 " mulscc %%g1,%3,%%g1\n" \
|
|
1320 " mulscc %%g1,%3,%%g1\n" \
|
|
1321 " mulscc %%g1,%3,%%g1\n" \
|
|
1322 " mulscc %%g1,%3,%%g1\n" \
|
|
1323 " mulscc %%g1,%3,%%g1\n" \
|
|
1324 " mulscc %%g1,%3,%%g1\n" \
|
|
1325 " mulscc %%g1,%3,%%g1\n" \
|
|
1326 " mulscc %%g1,%3,%%g1\n" \
|
|
1327 " mulscc %%g1,%3,%%g1\n" \
|
|
1328 " mulscc %%g1,%3,%%g1\n" \
|
|
1329 " mulscc %%g1,%3,%%g1\n" \
|
|
1330 " mulscc %%g1,%3,%%g1\n" \
|
|
1331 " mulscc %%g1,%3,%%g1\n" \
|
|
1332 " mulscc %%g1,%3,%%g1\n" \
|
|
1333 " mulscc %%g1,%3,%%g1\n" \
|
|
1334 " mulscc %%g1,%3,%%g1\n" \
|
|
1335 " mulscc %%g1,%3,%%g1\n" \
|
|
1336 " mulscc %%g1,%3,%%g1\n" \
|
|
1337 " mulscc %%g1,%3,%%g1\n" \
|
|
1338 " mulscc %%g1,%3,%%g1\n" \
|
|
1339 " mulscc %%g1,%3,%%g1\n" \
|
|
1340 " mulscc %%g1,%3,%%g1\n" \
|
|
1341 " mulscc %%g1,%3,%%g1\n" \
|
|
1342 " mulscc %%g1,0,%%g1\n" \
|
|
1343 " add %%g1,%%o5,%0\n" \
|
|
1344 " rd %%y,%1" \
|
|
1345 : "=r" ((USItype) (w1)), \
|
|
1346 "=r" ((USItype) (w0)) \
|
|
1347 : "%rI" ((USItype) (u)), \
|
|
1348 "r" ((USItype) (v)) \
|
|
1349 : "g1", "o5" __AND_CLOBBER_CC)
|
|
1350 #define UMUL_TIME 39 /* 39 instructions */
|
|
1351 /* It's quite necessary to add this much assembler for the sparc.
|
|
1352 The default udiv_qrnnd (in C) is more than 10 times slower! */
|
|
1353 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
|
|
1354 __asm__ ("! Inlined udiv_qrnnd\n" \
|
|
1355 " mov 32,%%g1\n" \
|
|
1356 " subcc %1,%2,%%g0\n" \
|
|
1357 "1: bcs 5f\n" \
|
|
1358 " addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
|
|
1359 " sub %1,%2,%1 ! this kills msb of n\n" \
|
|
1360 " addx %1,%1,%1 ! so this can't give carry\n" \
|
|
1361 " subcc %%g1,1,%%g1\n" \
|
|
1362 "2: bne 1b\n" \
|
|
1363 " subcc %1,%2,%%g0\n" \
|
|
1364 " bcs 3f\n" \
|
|
1365 " addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
|
|
1366 " b 3f\n" \
|
|
1367 " sub %1,%2,%1 ! this kills msb of n\n" \
|
|
1368 "4: sub %1,%2,%1\n" \
|
|
1369 "5: addxcc %1,%1,%1\n" \
|
|
1370 " bcc 2b\n" \
|
|
1371 " subcc %%g1,1,%%g1\n" \
|
|
1372 "! Got carry from n. Subtract next step to cancel this carry.\n" \
|
|
1373 " bne 4b\n" \
|
|
1374 " addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n" \
|
|
1375 " sub %1,%2,%1\n" \
|
|
1376 "3: xnor %0,0,%0\n" \
|
|
1377 " ! End of inline udiv_qrnnd" \
|
|
1378 : "=&r" ((USItype) (__q)), \
|
|
1379 "=&r" ((USItype) (__r)) \
|
|
1380 : "r" ((USItype) (__d)), \
|
|
1381 "1" ((USItype) (__n1)), \
|
|
1382 "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
|
|
1383 #define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */
|
|
1384 #endif /* __sparclite__ */
|
|
1385 #endif /* __sparc_v8__ */
|
|
1386 #endif /* __sparc_v9__ */
|
|
1387 #endif /* sparc32 */
|
|
1388
|
|
1389 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
|
|
1390 && W_TYPE_SIZE == 64
|
|
1391 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1392 do { \
|
|
1393 UDItype __carry = 0; \
|
|
1394 __asm__ ("addcc\t%r5,%6,%1\n\t" \
|
|
1395 "add\t%r3,%4,%0\n\t" \
|
|
1396 "movcs\t%%xcc, 1, %2\n\t" \
|
|
1397 "add\t%0, %2, %0" \
|
|
1398 : "=r" ((UDItype)(sh)), \
|
|
1399 "=&r" ((UDItype)(sl)), \
|
|
1400 "+r" (__carry) \
|
|
1401 : "%rJ" ((UDItype)(ah)), \
|
|
1402 "rI" ((UDItype)(bh)), \
|
|
1403 "%rJ" ((UDItype)(al)), \
|
|
1404 "rI" ((UDItype)(bl)) \
|
|
1405 __CLOBBER_CC); \
|
|
1406 } while (0)
|
|
1407
|
|
1408 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1409 do { \
|
|
1410 UDItype __carry = 0; \
|
|
1411 __asm__ ("subcc\t%r5,%6,%1\n\t" \
|
|
1412 "sub\t%r3,%4,%0\n\t" \
|
|
1413 "movcs\t%%xcc, 1, %2\n\t" \
|
|
1414 "sub\t%0, %2, %0" \
|
|
1415 : "=r" ((UDItype)(sh)), \
|
|
1416 "=&r" ((UDItype)(sl)), \
|
|
1417 "+r" (__carry) \
|
|
1418 : "%rJ" ((UDItype)(ah)), \
|
|
1419 "rI" ((UDItype)(bh)), \
|
|
1420 "%rJ" ((UDItype)(al)), \
|
|
1421 "rI" ((UDItype)(bl)) \
|
|
1422 __CLOBBER_CC); \
|
|
1423 } while (0)
|
|
1424
|
|
1425 #define umul_ppmm(wh, wl, u, v) \
|
|
1426 do { \
|
|
1427 UDItype tmp1, tmp2, tmp3, tmp4; \
|
|
1428 __asm__ __volatile__ ( \
|
|
1429 "srl %7,0,%3\n\t" \
|
|
1430 "mulx %3,%6,%1\n\t" \
|
|
1431 "srlx %6,32,%2\n\t" \
|
|
1432 "mulx %2,%3,%4\n\t" \
|
|
1433 "sllx %4,32,%5\n\t" \
|
|
1434 "srl %6,0,%3\n\t" \
|
|
1435 "sub %1,%5,%5\n\t" \
|
|
1436 "srlx %5,32,%5\n\t" \
|
|
1437 "addcc %4,%5,%4\n\t" \
|
|
1438 "srlx %7,32,%5\n\t" \
|
|
1439 "mulx %3,%5,%3\n\t" \
|
|
1440 "mulx %2,%5,%5\n\t" \
|
|
1441 "sethi %%hi(0x80000000),%2\n\t" \
|
|
1442 "addcc %4,%3,%4\n\t" \
|
|
1443 "srlx %4,32,%4\n\t" \
|
|
1444 "add %2,%2,%2\n\t" \
|
|
1445 "movcc %%xcc,%%g0,%2\n\t" \
|
|
1446 "addcc %5,%4,%5\n\t" \
|
|
1447 "sllx %3,32,%3\n\t" \
|
|
1448 "add %1,%3,%1\n\t" \
|
|
1449 "add %5,%2,%0" \
|
|
1450 : "=r" ((UDItype)(wh)), \
|
|
1451 "=&r" ((UDItype)(wl)), \
|
|
1452 "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
|
|
1453 : "r" ((UDItype)(u)), \
|
|
1454 "r" ((UDItype)(v)) \
|
|
1455 __CLOBBER_CC); \
|
|
1456 } while (0)
|
|
1457 #define UMUL_TIME 96
|
|
1458 #define UDIV_TIME 230
|
|
1459 #endif /* sparc64 */
|
|
1460
|
|
1461 #if defined (__vax__) && W_TYPE_SIZE == 32
|
|
1462 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1463 __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
|
|
1464 : "=g" ((USItype) (sh)), \
|
|
1465 "=&g" ((USItype) (sl)) \
|
|
1466 : "%0" ((USItype) (ah)), \
|
|
1467 "g" ((USItype) (bh)), \
|
|
1468 "%1" ((USItype) (al)), \
|
|
1469 "g" ((USItype) (bl)))
|
|
1470 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1471 __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \
|
|
1472 : "=g" ((USItype) (sh)), \
|
|
1473 "=&g" ((USItype) (sl)) \
|
|
1474 : "0" ((USItype) (ah)), \
|
|
1475 "g" ((USItype) (bh)), \
|
|
1476 "1" ((USItype) (al)), \
|
|
1477 "g" ((USItype) (bl)))
|
|
1478 #define umul_ppmm(xh, xl, m0, m1) \
|
|
1479 do { \
|
|
1480 union { \
|
|
1481 UDItype __ll; \
|
|
1482 struct {USItype __l, __h;} __i; \
|
|
1483 } __xx; \
|
|
1484 USItype __m0 = (m0), __m1 = (m1); \
|
|
1485 __asm__ ("emul %1,%2,$0,%0" \
|
|
1486 : "=r" (__xx.__ll) \
|
|
1487 : "g" (__m0), \
|
|
1488 "g" (__m1)); \
|
|
1489 (xh) = __xx.__i.__h; \
|
|
1490 (xl) = __xx.__i.__l; \
|
|
1491 (xh) += ((((SItype) __m0 >> 31) & __m1) \
|
|
1492 + (((SItype) __m1 >> 31) & __m0)); \
|
|
1493 } while (0)
|
|
1494 #define sdiv_qrnnd(q, r, n1, n0, d) \
|
|
1495 do { \
|
|
1496 union {DItype __ll; \
|
|
1497 struct {SItype __l, __h;} __i; \
|
|
1498 } __xx; \
|
|
1499 __xx.__i.__h = n1; __xx.__i.__l = n0; \
|
|
1500 __asm__ ("ediv %3,%2,%0,%1" \
|
|
1501 : "=g" (q), "=g" (r) \
|
|
1502 : "g" (__xx.__ll), "g" (d)); \
|
|
1503 } while (0)
|
|
1504 #endif /* __vax__ */
|
|
1505
|
|
1506 #ifdef _TMS320C6X
|
|
1507 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1508 do \
|
|
1509 { \
|
|
1510 UDItype __ll; \
|
|
1511 __asm__ ("addu .l1 %1, %2, %0" \
|
|
1512 : "=a" (__ll) : "a" (al), "a" (bl)); \
|
|
1513 (sl) = (USItype)__ll; \
|
|
1514 (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh); \
|
|
1515 } \
|
|
1516 while (0)
|
|
1517
|
|
1518 #ifdef _TMS320C6400_PLUS
|
|
1519 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
|
|
1520 #define umul_ppmm(w1, w0, u, v) \
|
|
1521 do { \
|
|
1522 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
|
|
1523 (w1) = (USItype) (__x >> 32); \
|
|
1524 (w0) = (USItype) (__x); \
|
|
1525 } while (0)
|
|
1526 #endif /* _TMS320C6400_PLUS */
|
|
1527
|
|
1528 #define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
|
|
1529 #ifdef _TMS320C6400
|
|
1530 #define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
|
|
1531 #endif
|
|
1532 #define UMUL_TIME 4
|
|
1533 #define UDIV_TIME 40
|
|
1534 #endif /* _TMS320C6X */
|
|
1535
|
|
1536 #if defined (__xtensa__) && W_TYPE_SIZE == 32
|
|
1537 /* This code is not Xtensa-configuration-specific, so rely on the compiler
|
|
1538 to expand builtin functions depending on what configuration features
|
|
1539 are available. This avoids library calls when the operation can be
|
|
1540 performed in-line. */
|
|
1541 #define umul_ppmm(w1, w0, u, v) \
|
|
1542 do { \
|
|
1543 DWunion __w; \
|
|
1544 __w.ll = __builtin_umulsidi3 (u, v); \
|
|
1545 w1 = __w.s.high; \
|
|
1546 w0 = __w.s.low; \
|
|
1547 } while (0)
|
|
1548 #define __umulsidi3(u, v) __builtin_umulsidi3 (u, v)
|
|
1549 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
|
|
1550 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
|
|
1551 #endif /* __xtensa__ */
|
|
1552
|
|
1553 #if defined xstormy16
|
|
1554 extern UHItype __stormy16_count_leading_zeros (UHItype);
|
|
1555 #define count_leading_zeros(count, x) \
|
|
1556 do \
|
|
1557 { \
|
|
1558 UHItype size; \
|
|
1559 \
|
|
1560 /* We assume that W_TYPE_SIZE is a multiple of 16... */ \
|
|
1561 for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16) \
|
|
1562 { \
|
|
1563 UHItype c; \
|
|
1564 \
|
|
1565 c = __clzhi2 ((x) >> (size - 16)); \
|
|
1566 (count) += c; \
|
|
1567 if (c != 16) \
|
|
1568 break; \
|
|
1569 } \
|
|
1570 } \
|
|
1571 while (0)
|
|
1572 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
|
|
1573 #endif
|
|
1574
|
|
1575 #if defined (__z8000__) && W_TYPE_SIZE == 16
|
|
1576 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1577 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
|
|
1578 : "=r" ((unsigned int)(sh)), \
|
|
1579 "=&r" ((unsigned int)(sl)) \
|
|
1580 : "%0" ((unsigned int)(ah)), \
|
|
1581 "r" ((unsigned int)(bh)), \
|
|
1582 "%1" ((unsigned int)(al)), \
|
|
1583 "rQR" ((unsigned int)(bl)))
|
|
1584 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1585 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
|
|
1586 : "=r" ((unsigned int)(sh)), \
|
|
1587 "=&r" ((unsigned int)(sl)) \
|
|
1588 : "0" ((unsigned int)(ah)), \
|
|
1589 "r" ((unsigned int)(bh)), \
|
|
1590 "1" ((unsigned int)(al)), \
|
|
1591 "rQR" ((unsigned int)(bl)))
|
|
1592 #define umul_ppmm(xh, xl, m0, m1) \
|
|
1593 do { \
|
|
1594 union {long int __ll; \
|
|
1595 struct {unsigned int __h, __l;} __i; \
|
|
1596 } __xx; \
|
|
1597 unsigned int __m0 = (m0), __m1 = (m1); \
|
|
1598 __asm__ ("mult %S0,%H3" \
|
|
1599 : "=r" (__xx.__i.__h), \
|
|
1600 "=r" (__xx.__i.__l) \
|
|
1601 : "%1" (__m0), \
|
|
1602 "rQR" (__m1)); \
|
|
1603 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
|
|
1604 (xh) += ((((signed int) __m0 >> 15) & __m1) \
|
|
1605 + (((signed int) __m1 >> 15) & __m0)); \
|
|
1606 } while (0)
|
|
1607 #endif /* __z8000__ */
|
|
1608
|
|
1609 #endif /* __GNUC__ */
|
|
1610
|
|
1611 /* If this machine has no inline assembler, use C macros. */
|
|
1612
|
|
1613 #if !defined (add_ssaaaa)
|
|
1614 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1615 do { \
|
|
1616 UWtype __x; \
|
|
1617 __x = (al) + (bl); \
|
|
1618 (sh) = (ah) + (bh) + (__x < (al)); \
|
|
1619 (sl) = __x; \
|
|
1620 } while (0)
|
|
1621 #endif
|
|
1622
|
|
1623 #if !defined (sub_ddmmss)
|
|
1624 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1625 do { \
|
|
1626 UWtype __x; \
|
|
1627 __x = (al) - (bl); \
|
|
1628 (sh) = (ah) - (bh) - (__x > (al)); \
|
|
1629 (sl) = __x; \
|
|
1630 } while (0)
|
|
1631 #endif
|
|
1632
|
|
1633 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
|
|
1634 smul_ppmm. */
|
|
1635 #if !defined (umul_ppmm) && defined (smul_ppmm)
|
|
1636 #define umul_ppmm(w1, w0, u, v) \
|
|
1637 do { \
|
|
1638 UWtype __w1; \
|
|
1639 UWtype __xm0 = (u), __xm1 = (v); \
|
|
1640 smul_ppmm (__w1, w0, __xm0, __xm1); \
|
|
1641 (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
|
|
1642 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
|
|
1643 } while (0)
|
|
1644 #endif
|
|
1645
|
|
1646 /* If we still don't have umul_ppmm, define it using plain C. */
|
|
1647 #if !defined (umul_ppmm)
|
|
1648 #define umul_ppmm(w1, w0, u, v) \
|
|
1649 do { \
|
|
1650 UWtype __x0, __x1, __x2, __x3; \
|
|
1651 UHWtype __ul, __vl, __uh, __vh; \
|
|
1652 \
|
|
1653 __ul = __ll_lowpart (u); \
|
|
1654 __uh = __ll_highpart (u); \
|
|
1655 __vl = __ll_lowpart (v); \
|
|
1656 __vh = __ll_highpart (v); \
|
|
1657 \
|
|
1658 __x0 = (UWtype) __ul * __vl; \
|
|
1659 __x1 = (UWtype) __ul * __vh; \
|
|
1660 __x2 = (UWtype) __uh * __vl; \
|
|
1661 __x3 = (UWtype) __uh * __vh; \
|
|
1662 \
|
|
1663 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
|
|
1664 __x1 += __x2; /* but this indeed can */ \
|
|
1665 if (__x1 < __x2) /* did we get it? */ \
|
|
1666 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
|
|
1667 \
|
|
1668 (w1) = __x3 + __ll_highpart (__x1); \
|
|
1669 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
|
|
1670 } while (0)
|
|
1671 #endif
|
|
1672
|
|
1673 #if !defined (__umulsidi3)
|
|
1674 #define __umulsidi3(u, v) \
|
|
1675 ({DWunion __w; \
|
|
1676 umul_ppmm (__w.s.high, __w.s.low, u, v); \
|
|
1677 __w.ll; })
|
|
1678 #endif
|
|
1679
|
|
1680 /* Define this unconditionally, so it can be used for debugging. */
|
|
1681 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
|
|
1682 do { \
|
|
1683 UWtype __d1, __d0, __q1, __q0; \
|
|
1684 UWtype __r1, __r0, __m; \
|
|
1685 __d1 = __ll_highpart (d); \
|
|
1686 __d0 = __ll_lowpart (d); \
|
|
1687 \
|
|
1688 __r1 = (n1) % __d1; \
|
|
1689 __q1 = (n1) / __d1; \
|
|
1690 __m = (UWtype) __q1 * __d0; \
|
|
1691 __r1 = __r1 * __ll_B | __ll_highpart (n0); \
|
|
1692 if (__r1 < __m) \
|
|
1693 { \
|
|
1694 __q1--, __r1 += (d); \
|
|
1695 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
|
|
1696 if (__r1 < __m) \
|
|
1697 __q1--, __r1 += (d); \
|
|
1698 } \
|
|
1699 __r1 -= __m; \
|
|
1700 \
|
|
1701 __r0 = __r1 % __d1; \
|
|
1702 __q0 = __r1 / __d1; \
|
|
1703 __m = (UWtype) __q0 * __d0; \
|
|
1704 __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
|
|
1705 if (__r0 < __m) \
|
|
1706 { \
|
|
1707 __q0--, __r0 += (d); \
|
|
1708 if (__r0 >= (d)) \
|
|
1709 if (__r0 < __m) \
|
|
1710 __q0--, __r0 += (d); \
|
|
1711 } \
|
|
1712 __r0 -= __m; \
|
|
1713 \
|
|
1714 (q) = (UWtype) __q1 * __ll_B | __q0; \
|
|
1715 (r) = __r0; \
|
|
1716 } while (0)
|
|
1717
|
|
1718 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
|
|
1719 __udiv_w_sdiv (defined in libgcc or elsewhere). */
|
|
1720 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
|
|
1721 #define udiv_qrnnd(q, r, nh, nl, d) \
|
|
1722 do { \
|
|
1723 extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype); \
|
|
1724 UWtype __r; \
|
|
1725 (q) = __udiv_w_sdiv (&__r, nh, nl, d); \
|
|
1726 (r) = __r; \
|
|
1727 } while (0)
|
|
1728 #endif
|
|
1729
|
|
1730 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
|
|
1731 #if !defined (udiv_qrnnd)
|
|
1732 #define UDIV_NEEDS_NORMALIZATION 1
|
|
1733 #define udiv_qrnnd __udiv_qrnnd_c
|
|
1734 #endif
|
|
1735
|
|
1736 #if !defined (count_leading_zeros)
|
|
1737 #define count_leading_zeros(count, x) \
|
|
1738 do { \
|
|
1739 UWtype __xr = (x); \
|
|
1740 UWtype __a; \
|
|
1741 \
|
|
1742 if (W_TYPE_SIZE <= 32) \
|
|
1743 { \
|
|
1744 __a = __xr < ((UWtype)1<<2*__BITS4) \
|
|
1745 ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \
|
|
1746 : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \
|
|
1747 } \
|
|
1748 else \
|
|
1749 { \
|
|
1750 for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
|
|
1751 if (((__xr >> __a) & 0xff) != 0) \
|
|
1752 break; \
|
|
1753 } \
|
|
1754 \
|
|
1755 (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
|
|
1756 } while (0)
|
|
1757 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
|
|
1758 #endif
|
|
1759
|
|
1760 #if !defined (count_trailing_zeros)
|
|
1761 /* Define count_trailing_zeros using count_leading_zeros. The latter might be
|
|
1762 defined in asm, but if it is not, the C version above is good enough. */
|
|
1763 #define count_trailing_zeros(count, x) \
|
|
1764 do { \
|
|
1765 UWtype __ctz_x = (x); \
|
|
1766 UWtype __ctz_c; \
|
|
1767 count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
|
|
1768 (count) = W_TYPE_SIZE - 1 - __ctz_c; \
|
|
1769 } while (0)
|
|
1770 #endif
|
|
1771
|
|
1772 #ifndef UDIV_NEEDS_NORMALIZATION
|
|
1773 #define UDIV_NEEDS_NORMALIZATION 0
|
|
1774 #endif
|