111
|
1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
|
131
|
2 Copyright (C) 1991-2018 Free Software Foundation, Inc.
|
111
|
3
|
|
4 This file is part of the GNU C Library.
|
|
5
|
|
6 The GNU C Library is free software; you can redistribute it and/or
|
|
7 modify it under the terms of the GNU Lesser General Public
|
|
8 License as published by the Free Software Foundation; either
|
|
9 version 2.1 of the License, or (at your option) any later version.
|
|
10
|
|
11 In addition to the permissions in the GNU Lesser General Public
|
|
12 License, the Free Software Foundation gives you unlimited
|
|
13 permission to link the compiled version of this file into
|
|
14 combinations with other programs, and to distribute those
|
|
15 combinations without any restriction coming from the use of this
|
|
16 file. (The Lesser General Public License restrictions do apply in
|
|
17 other respects; for example, they cover modification of the file,
|
|
18 and distribution when not linked into a combine executable.)
|
|
19
|
|
20 The GNU C Library is distributed in the hope that it will be useful,
|
|
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
23 Lesser General Public License for more details.
|
|
24
|
|
25 You should have received a copy of the GNU Lesser General Public
|
|
26 License along with the GNU C Library; if not, see
|
|
27 <http://www.gnu.org/licenses/>. */
|
|
28
|
|
29 /* You have to define the following before including this file:
|
|
30
|
|
31 UWtype -- An unsigned type, default type for operations (typically a "word")
|
|
32 UHWtype -- An unsigned type, at least half the size of UWtype.
|
|
33 UDWtype -- An unsigned type, at least twice as large a UWtype
|
|
34 W_TYPE_SIZE -- size in bits of UWtype
|
|
35
|
|
36 UQItype -- Unsigned 8 bit type.
|
|
37 SItype, USItype -- Signed and unsigned 32 bit types.
|
|
38 DItype, UDItype -- Signed and unsigned 64 bit types.
|
|
39
|
|
40 On a 32 bit machine UWtype should typically be USItype;
|
|
41 on a 64 bit machine, UWtype should typically be UDItype. */
|
|
42
|
|
43 #define __BITS4 (W_TYPE_SIZE / 4)
|
|
44 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
|
|
45 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
|
|
46 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
|
|
47
|
|
48 #ifndef W_TYPE_SIZE
|
|
49 #define W_TYPE_SIZE 32
|
|
50 #define UWtype USItype
|
|
51 #define UHWtype USItype
|
|
52 #define UDWtype UDItype
|
|
53 #endif
|
|
54
|
|
55 /* Used in glibc only. */
|
|
56 #ifndef attribute_hidden
|
|
57 #define attribute_hidden
|
|
58 #endif
|
|
59
|
|
60 extern const UQItype __clz_tab[256] attribute_hidden;
|
|
61
|
|
62 /* Define auxiliary asm macros.
|
|
63
|
|
64 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
|
|
65 UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
|
|
66 word product in HIGH_PROD and LOW_PROD.
|
|
67
|
|
68 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
|
|
69 UDWtype product. This is just a variant of umul_ppmm.
|
|
70
|
|
71 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
|
|
72 denominator) divides a UDWtype, composed by the UWtype integers
|
|
73 HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
|
|
74 in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
|
|
75 than DENOMINATOR for correct operation. If, in addition, the most
|
|
76 significant bit of DENOMINATOR must be 1, then the pre-processor symbol
|
|
77 UDIV_NEEDS_NORMALIZATION is defined to 1.
|
|
78
|
|
79 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
|
|
80 denominator). Like udiv_qrnnd but the numbers are signed. The quotient
|
|
81 is rounded towards 0.
|
|
82
|
|
83 5) count_leading_zeros(count, x) counts the number of zero-bits from the
|
|
84 msb to the first nonzero bit in the UWtype X. This is the number of
|
|
85 steps X needs to be shifted left to set the msb. Undefined for X == 0,
|
|
86 unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
|
|
87
|
|
88 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
|
|
89 from the least significant end.
|
|
90
|
|
91 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
|
|
92 high_addend_2, low_addend_2) adds two UWtype integers, composed by
|
|
93 HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
|
|
94 respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
|
|
95 (i.e. carry out) is not stored anywhere, and is lost.
|
|
96
|
|
97 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
|
|
98 high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
|
|
99 composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
|
|
100 LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
|
|
101 and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
|
|
102 and is lost.
|
|
103
|
|
104 If any of these macros are left undefined for a particular CPU,
|
|
105 C macros are used. */
|
|
106
|
|
107 /* The CPUs come in alphabetical order below.
|
|
108
|
|
109 Please add support for more CPUs here, or improve the current support
|
|
110 for the CPUs below!
|
|
111 (E.g. WE32100, IBM360.) */
|
|
112
|
|
113 #if defined (__GNUC__) && !defined (NO_ASM)
|
|
114
|
|
115 /* We sometimes need to clobber "cc" with gcc2, but that would not be
|
|
116 understood by gcc1. Use cpp to avoid major code duplication. */
|
|
117 #if __GNUC__ < 2
|
|
118 #define __CLOBBER_CC
|
|
119 #define __AND_CLOBBER_CC
|
|
120 #else /* __GNUC__ >= 2 */
|
|
121 #define __CLOBBER_CC : "cc"
|
|
122 #define __AND_CLOBBER_CC , "cc"
|
|
123 #endif /* __GNUC__ < 2 */
|
|
124
|
|
125 #if defined (__aarch64__)
|
|
126
|
|
127 #if W_TYPE_SIZE == 32
|
|
128 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
|
|
129 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
|
|
130 #define COUNT_LEADING_ZEROS_0 32
|
|
131 #endif /* W_TYPE_SIZE == 32 */
|
|
132
|
|
133 #if W_TYPE_SIZE == 64
|
|
134 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clzll (X))
|
|
135 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctzll (X))
|
|
136 #define COUNT_LEADING_ZEROS_0 64
|
|
137 #endif /* W_TYPE_SIZE == 64 */
|
|
138
|
|
139 #endif /* __aarch64__ */
|
|
140
|
|
141 #if defined (__alpha) && W_TYPE_SIZE == 64
|
|
142 /* There is a bug in g++ before version 5 that
|
|
143 errors on __builtin_alpha_umulh. */
|
|
144 #if !defined(__cplusplus) || __GNUC__ >= 5
|
|
145 #define umul_ppmm(ph, pl, m0, m1) \
|
|
146 do { \
|
|
147 UDItype __m0 = (m0), __m1 = (m1); \
|
|
148 (ph) = __builtin_alpha_umulh (__m0, __m1); \
|
|
149 (pl) = __m0 * __m1; \
|
|
150 } while (0)
|
|
151 #define UMUL_TIME 46
|
|
152 #endif /* !c++ */
|
|
153 #ifndef LONGLONG_STANDALONE
|
|
154 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
155 do { UDItype __r; \
|
|
156 (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
|
|
157 (r) = __r; \
|
|
158 } while (0)
|
|
159 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
|
|
160 #define UDIV_TIME 220
|
|
161 #endif /* LONGLONG_STANDALONE */
|
|
162 #ifdef __alpha_cix__
|
|
163 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
|
|
164 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
|
|
165 #define COUNT_LEADING_ZEROS_0 64
|
|
166 #else
|
|
167 #define count_leading_zeros(COUNT,X) \
|
|
168 do { \
|
|
169 UDItype __xr = (X), __t, __a; \
|
|
170 __t = __builtin_alpha_cmpbge (0, __xr); \
|
|
171 __a = __clz_tab[__t ^ 0xff] - 1; \
|
|
172 __t = __builtin_alpha_extbl (__xr, __a); \
|
|
173 (COUNT) = 64 - (__clz_tab[__t] + __a*8); \
|
|
174 } while (0)
|
|
175 #define count_trailing_zeros(COUNT,X) \
|
|
176 do { \
|
|
177 UDItype __xr = (X), __t, __a; \
|
|
178 __t = __builtin_alpha_cmpbge (0, __xr); \
|
|
179 __t = ~__t & -~__t; \
|
|
180 __a = ((__t & 0xCC) != 0) * 2; \
|
|
181 __a += ((__t & 0xF0) != 0) * 4; \
|
|
182 __a += ((__t & 0xAA) != 0); \
|
|
183 __t = __builtin_alpha_extbl (__xr, __a); \
|
|
184 __a <<= 3; \
|
|
185 __t &= -__t; \
|
|
186 __a += ((__t & 0xCC) != 0) * 2; \
|
|
187 __a += ((__t & 0xF0) != 0) * 4; \
|
|
188 __a += ((__t & 0xAA) != 0); \
|
|
189 (COUNT) = __a; \
|
|
190 } while (0)
|
|
191 #endif /* __alpha_cix__ */
|
|
192 #endif /* __alpha */
|
|
193
|
|
194 #if defined (__arc__) && W_TYPE_SIZE == 32
|
|
195 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
196 __asm__ ("add.f %1, %4, %5\n\tadc %0, %2, %3" \
|
|
197 : "=r" ((USItype) (sh)), \
|
|
198 "=&r" ((USItype) (sl)) \
|
|
199 : "%r" ((USItype) (ah)), \
|
|
200 "rICal" ((USItype) (bh)), \
|
|
201 "%r" ((USItype) (al)), \
|
|
202 "rICal" ((USItype) (bl)))
|
|
203 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
204 __asm__ ("sub.f %1, %4, %5\n\tsbc %0, %2, %3" \
|
|
205 : "=r" ((USItype) (sh)), \
|
|
206 "=&r" ((USItype) (sl)) \
|
|
207 : "r" ((USItype) (ah)), \
|
|
208 "rICal" ((USItype) (bh)), \
|
|
209 "r" ((USItype) (al)), \
|
|
210 "rICal" ((USItype) (bl)))
|
|
211
|
|
212 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
|
|
213 #ifdef __ARC_NORM__
|
|
214 #define count_leading_zeros(count, x) \
|
|
215 do \
|
|
216 { \
|
|
217 SItype c_; \
|
|
218 \
|
|
219 __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
|
|
220 (count) = c_ + 1; \
|
|
221 } \
|
|
222 while (0)
|
|
223 #define COUNT_LEADING_ZEROS_0 32
|
|
224 #endif /* __ARC_NORM__ */
|
|
225 #endif /* __arc__ */
|
|
226
|
|
227 #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
|
|
228 && W_TYPE_SIZE == 32
|
|
229 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
230 __asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \
|
|
231 : "=r" ((USItype) (sh)), \
|
|
232 "=&r" ((USItype) (sl)) \
|
|
233 : "%r" ((USItype) (ah)), \
|
|
234 "rI" ((USItype) (bh)), \
|
|
235 "%r" ((USItype) (al)), \
|
|
236 "rI" ((USItype) (bl)) __CLOBBER_CC)
|
|
237 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
238 __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \
|
|
239 : "=r" ((USItype) (sh)), \
|
|
240 "=&r" ((USItype) (sl)) \
|
|
241 : "r" ((USItype) (ah)), \
|
|
242 "rI" ((USItype) (bh)), \
|
|
243 "r" ((USItype) (al)), \
|
|
244 "rI" ((USItype) (bl)) __CLOBBER_CC)
|
|
245 # if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
|
|
246 || defined(__ARM_ARCH_3__)
|
|
247 # define umul_ppmm(xh, xl, a, b) \
|
|
248 do { \
|
|
249 register USItype __t0, __t1, __t2; \
|
|
250 __asm__ ("%@ Inlined umul_ppmm\n" \
|
|
251 " mov %2, %5, lsr #16\n" \
|
|
252 " mov %0, %6, lsr #16\n" \
|
|
253 " bic %3, %5, %2, lsl #16\n" \
|
|
254 " bic %4, %6, %0, lsl #16\n" \
|
|
255 " mul %1, %3, %4\n" \
|
|
256 " mul %4, %2, %4\n" \
|
|
257 " mul %3, %0, %3\n" \
|
|
258 " mul %0, %2, %0\n" \
|
|
259 " adds %3, %4, %3\n" \
|
|
260 " addcs %0, %0, #65536\n" \
|
|
261 " adds %1, %1, %3, lsl #16\n" \
|
|
262 " adc %0, %0, %3, lsr #16" \
|
|
263 : "=&r" ((USItype) (xh)), \
|
|
264 "=r" ((USItype) (xl)), \
|
|
265 "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \
|
|
266 : "r" ((USItype) (a)), \
|
|
267 "r" ((USItype) (b)) __CLOBBER_CC ); \
|
|
268 } while (0)
|
|
269 # define UMUL_TIME 20
|
|
270 # else
|
|
271 # define umul_ppmm(xh, xl, a, b) \
|
|
272 do { \
|
|
273 /* Generate umull, under compiler control. */ \
|
|
274 register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \
|
|
275 (xl) = (USItype)__t0; \
|
|
276 (xh) = (USItype)(__t0 >> 32); \
|
|
277 } while (0)
|
|
278 # define UMUL_TIME 3
|
|
279 # endif
|
|
280 # define UDIV_TIME 100
|
|
281 #endif /* __arm__ */
|
|
282
|
|
283 #if defined(__arm__)
|
|
284 /* Let gcc decide how best to implement count_leading_zeros. */
|
|
285 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
|
|
286 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
|
|
287 #define COUNT_LEADING_ZEROS_0 32
|
|
288 #endif
|
|
289
|
|
290 #if defined (__AVR__)
|
|
291
|
|
292 #if W_TYPE_SIZE == 16
|
|
293 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
|
|
294 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
|
|
295 #define COUNT_LEADING_ZEROS_0 16
|
|
296 #endif /* W_TYPE_SIZE == 16 */
|
|
297
|
|
298 #if W_TYPE_SIZE == 32
|
|
299 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
|
|
300 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
|
|
301 #define COUNT_LEADING_ZEROS_0 32
|
|
302 #endif /* W_TYPE_SIZE == 32 */
|
|
303
|
|
304 #if W_TYPE_SIZE == 64
|
|
305 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzll (X))
|
|
306 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
|
|
307 #define COUNT_LEADING_ZEROS_0 64
|
|
308 #endif /* W_TYPE_SIZE == 64 */
|
|
309
|
|
310 #endif /* defined (__AVR__) */
|
|
311
|
|
312 #if defined (__CRIS__)
|
|
313
|
|
314 #if __CRIS_arch_version >= 3
|
|
315 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
|
|
316 #define COUNT_LEADING_ZEROS_0 32
|
|
317 #endif /* __CRIS_arch_version >= 3 */
|
|
318
|
|
319 #if __CRIS_arch_version >= 8
|
|
320 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
|
|
321 #endif /* __CRIS_arch_version >= 8 */
|
|
322
|
|
323 #if __CRIS_arch_version >= 10
|
|
324 #define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v))
|
|
325 #else
|
|
326 #define __umulsidi3 __umulsidi3
|
|
327 extern UDItype __umulsidi3 (USItype, USItype);
|
|
328 #endif /* __CRIS_arch_version >= 10 */
|
|
329
|
|
330 #define umul_ppmm(w1, w0, u, v) \
|
|
331 do { \
|
|
332 UDItype __x = __umulsidi3 (u, v); \
|
|
333 (w0) = (USItype) (__x); \
|
|
334 (w1) = (USItype) (__x >> 32); \
|
|
335 } while (0)
|
|
336
|
|
337 /* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for
|
|
338 DFmode ("double" intrinsics, avoiding two of the three insns handling
|
|
339 carry), but defining them as open-code C composing and doing the
|
|
340 operation in DImode (UDImode) shows that the DImode needs work:
|
|
341 register pressure from requiring neighboring registers and the
|
|
342 traffic to and from them come to dominate, in the 4.7 series. */
|
|
343
|
|
344 #endif /* defined (__CRIS__) */
|
|
345
|
|
346 #if defined (__hppa) && W_TYPE_SIZE == 32
|
|
347 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
348 __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
|
|
349 : "=r" ((USItype) (sh)), \
|
|
350 "=&r" ((USItype) (sl)) \
|
|
351 : "%rM" ((USItype) (ah)), \
|
|
352 "rM" ((USItype) (bh)), \
|
|
353 "%rM" ((USItype) (al)), \
|
|
354 "rM" ((USItype) (bl)))
|
|
355 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
356 __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \
|
|
357 : "=r" ((USItype) (sh)), \
|
|
358 "=&r" ((USItype) (sl)) \
|
|
359 : "rM" ((USItype) (ah)), \
|
|
360 "rM" ((USItype) (bh)), \
|
|
361 "rM" ((USItype) (al)), \
|
|
362 "rM" ((USItype) (bl)))
|
|
363 #if defined (_PA_RISC1_1)
|
|
364 #define umul_ppmm(w1, w0, u, v) \
|
|
365 do { \
|
|
366 union \
|
|
367 { \
|
|
368 UDItype __f; \
|
|
369 struct {USItype __w1, __w0;} __w1w0; \
|
|
370 } __t; \
|
|
371 __asm__ ("xmpyu %1,%2,%0" \
|
|
372 : "=x" (__t.__f) \
|
|
373 : "x" ((USItype) (u)), \
|
|
374 "x" ((USItype) (v))); \
|
|
375 (w1) = __t.__w1w0.__w1; \
|
|
376 (w0) = __t.__w1w0.__w0; \
|
|
377 } while (0)
|
|
378 #define UMUL_TIME 8
|
|
379 #else
|
|
380 #define UMUL_TIME 30
|
|
381 #endif
|
|
382 #define UDIV_TIME 40
|
|
383 #define count_leading_zeros(count, x) \
|
|
384 do { \
|
|
385 USItype __tmp; \
|
|
386 __asm__ ( \
|
|
387 "ldi 1,%0\n" \
|
|
388 " extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \
|
|
389 " extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n"\
|
|
390 " ldo 16(%0),%0 ; Yes. Perform add.\n" \
|
|
391 " extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \
|
|
392 " extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n"\
|
|
393 " ldo 8(%0),%0 ; Yes. Perform add.\n" \
|
|
394 " extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \
|
|
395 " extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n"\
|
|
396 " ldo 4(%0),%0 ; Yes. Perform add.\n" \
|
|
397 " extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \
|
|
398 " extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n"\
|
|
399 " ldo 2(%0),%0 ; Yes. Perform add.\n" \
|
|
400 " extru %1,30,1,%1 ; Extract bit 1.\n" \
|
|
401 " sub %0,%1,%0 ; Subtract it.\n" \
|
|
402 : "=r" (count), "=r" (__tmp) : "1" (x)); \
|
|
403 } while (0)
|
|
404 #endif
|
|
405
|
|
406 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
|
|
407 #if !defined (__zarch__)
|
|
408 #define smul_ppmm(xh, xl, m0, m1) \
|
|
409 do { \
|
|
410 union {DItype __ll; \
|
|
411 struct {USItype __h, __l;} __i; \
|
|
412 } __x; \
|
|
413 __asm__ ("lr %N0,%1\n\tmr %0,%2" \
|
|
414 : "=&r" (__x.__ll) \
|
|
415 : "r" (m0), "r" (m1)); \
|
|
416 (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
|
|
417 } while (0)
|
|
418 #define sdiv_qrnnd(q, r, n1, n0, d) \
|
|
419 do { \
|
|
420 union {DItype __ll; \
|
|
421 struct {USItype __h, __l;} __i; \
|
|
422 } __x; \
|
|
423 __x.__i.__h = n1; __x.__i.__l = n0; \
|
|
424 __asm__ ("dr %0,%2" \
|
|
425 : "=r" (__x.__ll) \
|
|
426 : "0" (__x.__ll), "r" (d)); \
|
|
427 (q) = __x.__i.__l; (r) = __x.__i.__h; \
|
|
428 } while (0)
|
|
429 #else
|
|
430 #define smul_ppmm(xh, xl, m0, m1) \
|
|
431 do { \
|
|
432 register SItype __r0 __asm__ ("0"); \
|
|
433 register SItype __r1 __asm__ ("1") = (m0); \
|
|
434 \
|
|
435 __asm__ ("mr\t%%r0,%3" \
|
|
436 : "=r" (__r0), "=r" (__r1) \
|
|
437 : "r" (__r1), "r" (m1)); \
|
|
438 (xh) = __r0; (xl) = __r1; \
|
|
439 } while (0)
|
|
440
|
|
441 #define sdiv_qrnnd(q, r, n1, n0, d) \
|
|
442 do { \
|
|
443 register SItype __r0 __asm__ ("0") = (n1); \
|
|
444 register SItype __r1 __asm__ ("1") = (n0); \
|
|
445 \
|
|
446 __asm__ ("dr\t%%r0,%4" \
|
|
447 : "=r" (__r0), "=r" (__r1) \
|
|
448 : "r" (__r0), "r" (__r1), "r" (d)); \
|
|
449 (q) = __r1; (r) = __r0; \
|
|
450 } while (0)
|
|
451 #endif /* __zarch__ */
|
|
452 #endif
|
|
453
|
|
454 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
|
|
455 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
456 __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}" \
|
|
457 : "=r" ((USItype) (sh)), \
|
|
458 "=&r" ((USItype) (sl)) \
|
|
459 : "%0" ((USItype) (ah)), \
|
|
460 "g" ((USItype) (bh)), \
|
|
461 "%1" ((USItype) (al)), \
|
|
462 "g" ((USItype) (bl)))
|
|
463 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
464 __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}" \
|
|
465 : "=r" ((USItype) (sh)), \
|
|
466 "=&r" ((USItype) (sl)) \
|
|
467 : "0" ((USItype) (ah)), \
|
|
468 "g" ((USItype) (bh)), \
|
|
469 "1" ((USItype) (al)), \
|
|
470 "g" ((USItype) (bl)))
|
|
471 #define umul_ppmm(w1, w0, u, v) \
|
|
472 __asm__ ("mul{l} %3" \
|
|
473 : "=a" ((USItype) (w0)), \
|
|
474 "=d" ((USItype) (w1)) \
|
|
475 : "%0" ((USItype) (u)), \
|
|
476 "rm" ((USItype) (v)))
|
|
477 #define udiv_qrnnd(q, r, n1, n0, dv) \
|
|
478 __asm__ ("div{l} %4" \
|
|
479 : "=a" ((USItype) (q)), \
|
|
480 "=d" ((USItype) (r)) \
|
|
481 : "0" ((USItype) (n0)), \
|
|
482 "1" ((USItype) (n1)), \
|
|
483 "rm" ((USItype) (dv)))
|
|
484 #define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
|
|
485 #define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
|
|
486 #define UMUL_TIME 40
|
|
487 #define UDIV_TIME 40
|
|
488 #endif /* 80x86 */
|
|
489
|
|
490 #if defined (__x86_64__) && W_TYPE_SIZE == 64
|
|
491 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
492 __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}" \
|
|
493 : "=r" ((UDItype) (sh)), \
|
|
494 "=&r" ((UDItype) (sl)) \
|
|
495 : "%0" ((UDItype) (ah)), \
|
|
496 "rme" ((UDItype) (bh)), \
|
|
497 "%1" ((UDItype) (al)), \
|
|
498 "rme" ((UDItype) (bl)))
|
|
499 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
500 __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}" \
|
|
501 : "=r" ((UDItype) (sh)), \
|
|
502 "=&r" ((UDItype) (sl)) \
|
|
503 : "0" ((UDItype) (ah)), \
|
|
504 "rme" ((UDItype) (bh)), \
|
|
505 "1" ((UDItype) (al)), \
|
|
506 "rme" ((UDItype) (bl)))
|
|
507 #define umul_ppmm(w1, w0, u, v) \
|
|
508 __asm__ ("mul{q} %3" \
|
|
509 : "=a" ((UDItype) (w0)), \
|
|
510 "=d" ((UDItype) (w1)) \
|
|
511 : "%0" ((UDItype) (u)), \
|
|
512 "rm" ((UDItype) (v)))
|
|
513 #define udiv_qrnnd(q, r, n1, n0, dv) \
|
|
514 __asm__ ("div{q} %4" \
|
|
515 : "=a" ((UDItype) (q)), \
|
|
516 "=d" ((UDItype) (r)) \
|
|
517 : "0" ((UDItype) (n0)), \
|
|
518 "1" ((UDItype) (n1)), \
|
|
519 "rm" ((UDItype) (dv)))
|
|
520 #define count_leading_zeros(count, x) ((count) = __builtin_clzll (x))
|
|
521 #define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x))
|
|
522 #define UMUL_TIME 40
|
|
523 #define UDIV_TIME 40
|
|
524 #endif /* x86_64 */
|
|
525
|
|
526 #if defined (__i960__) && W_TYPE_SIZE == 32
|
|
527 #define umul_ppmm(w1, w0, u, v) \
|
|
528 ({union {UDItype __ll; \
|
|
529 struct {USItype __l, __h;} __i; \
|
|
530 } __xx; \
|
|
531 __asm__ ("emul %2,%1,%0" \
|
|
532 : "=d" (__xx.__ll) \
|
|
533 : "%dI" ((USItype) (u)), \
|
|
534 "dI" ((USItype) (v))); \
|
|
535 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
|
|
536 #define __umulsidi3(u, v) \
|
|
537 ({UDItype __w; \
|
|
538 __asm__ ("emul %2,%1,%0" \
|
|
539 : "=d" (__w) \
|
|
540 : "%dI" ((USItype) (u)), \
|
|
541 "dI" ((USItype) (v))); \
|
|
542 __w; })
|
|
543 #endif /* __i960__ */
|
|
544
|
|
545 #if defined (__ia64) && W_TYPE_SIZE == 64
|
|
546 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
|
|
547 "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic
|
|
548 code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
|
|
549 register, which takes an extra cycle. */
|
|
550 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
551 do { \
|
|
552 UWtype __x; \
|
|
553 __x = (al) - (bl); \
|
|
554 if ((al) < (bl)) \
|
|
555 (sh) = (ah) - (bh) - 1; \
|
|
556 else \
|
|
557 (sh) = (ah) - (bh); \
|
|
558 (sl) = __x; \
|
|
559 } while (0)
|
|
560
|
|
561 /* Do both product parts in assembly, since that gives better code with
|
|
562 all gcc versions. Some callers will just use the upper part, and in
|
|
563 that situation we waste an instruction, but not any cycles. */
|
|
564 #define umul_ppmm(ph, pl, m0, m1) \
|
|
565 __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \
|
|
566 : "=&f" (ph), "=f" (pl) \
|
|
567 : "f" (m0), "f" (m1))
|
|
568 #define count_leading_zeros(count, x) \
|
|
569 do { \
|
|
570 UWtype _x = (x), _y, _a, _c; \
|
|
571 __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \
|
|
572 __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \
|
|
573 _c = (_a - 1) << 3; \
|
|
574 _x >>= _c; \
|
|
575 if (_x >= 1 << 4) \
|
|
576 _x >>= 4, _c += 4; \
|
|
577 if (_x >= 1 << 2) \
|
|
578 _x >>= 2, _c += 2; \
|
|
579 _c += _x >> 1; \
|
|
580 (count) = W_TYPE_SIZE - 1 - _c; \
|
|
581 } while (0)
|
|
582 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
|
|
583 based, and we don't need a special case for x==0 here */
|
|
584 #define count_trailing_zeros(count, x) \
|
|
585 do { \
|
|
586 UWtype __ctz_x = (x); \
|
|
587 __asm__ ("popcnt %0 = %1" \
|
|
588 : "=r" (count) \
|
|
589 : "r" ((__ctz_x-1) & ~__ctz_x)); \
|
|
590 } while (0)
|
|
591 #define UMUL_TIME 14
|
|
592 #endif
|
|
593
|
|
594 #if defined (__M32R__) && W_TYPE_SIZE == 32
|
|
595 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
596 /* The cmp clears the condition bit. */ \
|
|
597 __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3" \
|
|
598 : "=r" ((USItype) (sh)), \
|
|
599 "=&r" ((USItype) (sl)) \
|
|
600 : "0" ((USItype) (ah)), \
|
|
601 "r" ((USItype) (bh)), \
|
|
602 "1" ((USItype) (al)), \
|
|
603 "r" ((USItype) (bl)) \
|
|
604 : "cbit")
|
|
605 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
606 /* The cmp clears the condition bit. */ \
|
|
607 __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3" \
|
|
608 : "=r" ((USItype) (sh)), \
|
|
609 "=&r" ((USItype) (sl)) \
|
|
610 : "0" ((USItype) (ah)), \
|
|
611 "r" ((USItype) (bh)), \
|
|
612 "1" ((USItype) (al)), \
|
|
613 "r" ((USItype) (bl)) \
|
|
614 : "cbit")
|
|
615 #endif /* __M32R__ */
|
|
616
|
|
617 #if defined (__mc68000__) && W_TYPE_SIZE == 32
|
|
618 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
619 __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \
|
|
620 : "=d" ((USItype) (sh)), \
|
|
621 "=&d" ((USItype) (sl)) \
|
|
622 : "%0" ((USItype) (ah)), \
|
|
623 "d" ((USItype) (bh)), \
|
|
624 "%1" ((USItype) (al)), \
|
|
625 "g" ((USItype) (bl)))
|
|
626 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
627 __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \
|
|
628 : "=d" ((USItype) (sh)), \
|
|
629 "=&d" ((USItype) (sl)) \
|
|
630 : "0" ((USItype) (ah)), \
|
|
631 "d" ((USItype) (bh)), \
|
|
632 "1" ((USItype) (al)), \
|
|
633 "g" ((USItype) (bl)))
|
|
634
|
|
635 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r. */
|
|
636 #if (defined (__mc68020__) && !defined (__mc68060__))
|
|
637 #define umul_ppmm(w1, w0, u, v) \
|
|
638 __asm__ ("mulu%.l %3,%1:%0" \
|
|
639 : "=d" ((USItype) (w0)), \
|
|
640 "=d" ((USItype) (w1)) \
|
|
641 : "%0" ((USItype) (u)), \
|
|
642 "dmi" ((USItype) (v)))
|
|
643 #define UMUL_TIME 45
|
|
644 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
645 __asm__ ("divu%.l %4,%1:%0" \
|
|
646 : "=d" ((USItype) (q)), \
|
|
647 "=d" ((USItype) (r)) \
|
|
648 : "0" ((USItype) (n0)), \
|
|
649 "1" ((USItype) (n1)), \
|
|
650 "dmi" ((USItype) (d)))
|
|
651 #define UDIV_TIME 90
|
|
652 #define sdiv_qrnnd(q, r, n1, n0, d) \
|
|
653 __asm__ ("divs%.l %4,%1:%0" \
|
|
654 : "=d" ((USItype) (q)), \
|
|
655 "=d" ((USItype) (r)) \
|
|
656 : "0" ((USItype) (n0)), \
|
|
657 "1" ((USItype) (n1)), \
|
|
658 "dmi" ((USItype) (d)))
|
|
659
|
|
660 #elif defined (__mcoldfire__) /* not mc68020 */
|
|
661
|
|
662 #define umul_ppmm(xh, xl, a, b) \
|
|
663 __asm__ ("| Inlined umul_ppmm\n" \
|
|
664 " move%.l %2,%/d0\n" \
|
|
665 " move%.l %3,%/d1\n" \
|
|
666 " move%.l %/d0,%/d2\n" \
|
|
667 " swap %/d0\n" \
|
|
668 " move%.l %/d1,%/d3\n" \
|
|
669 " swap %/d1\n" \
|
|
670 " move%.w %/d2,%/d4\n" \
|
|
671 " mulu %/d3,%/d4\n" \
|
|
672 " mulu %/d1,%/d2\n" \
|
|
673 " mulu %/d0,%/d3\n" \
|
|
674 " mulu %/d0,%/d1\n" \
|
|
675 " move%.l %/d4,%/d0\n" \
|
|
676 " clr%.w %/d0\n" \
|
|
677 " swap %/d0\n" \
|
|
678 " add%.l %/d0,%/d2\n" \
|
|
679 " add%.l %/d3,%/d2\n" \
|
|
680 " jcc 1f\n" \
|
|
681 " add%.l %#65536,%/d1\n" \
|
|
682 "1: swap %/d2\n" \
|
|
683 " moveq %#0,%/d0\n" \
|
|
684 " move%.w %/d2,%/d0\n" \
|
|
685 " move%.w %/d4,%/d2\n" \
|
|
686 " move%.l %/d2,%1\n" \
|
|
687 " add%.l %/d1,%/d0\n" \
|
|
688 " move%.l %/d0,%0" \
|
|
689 : "=g" ((USItype) (xh)), \
|
|
690 "=g" ((USItype) (xl)) \
|
|
691 : "g" ((USItype) (a)), \
|
|
692 "g" ((USItype) (b)) \
|
|
693 : "d0", "d1", "d2", "d3", "d4")
|
|
694 #define UMUL_TIME 100
|
|
695 #define UDIV_TIME 400
|
|
696 #else /* not ColdFire */
|
|
697 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX. */
|
|
698 #define umul_ppmm(xh, xl, a, b) \
|
|
699 __asm__ ("| Inlined umul_ppmm\n" \
|
|
700 " move%.l %2,%/d0\n" \
|
|
701 " move%.l %3,%/d1\n" \
|
|
702 " move%.l %/d0,%/d2\n" \
|
|
703 " swap %/d0\n" \
|
|
704 " move%.l %/d1,%/d3\n" \
|
|
705 " swap %/d1\n" \
|
|
706 " move%.w %/d2,%/d4\n" \
|
|
707 " mulu %/d3,%/d4\n" \
|
|
708 " mulu %/d1,%/d2\n" \
|
|
709 " mulu %/d0,%/d3\n" \
|
|
710 " mulu %/d0,%/d1\n" \
|
|
711 " move%.l %/d4,%/d0\n" \
|
|
712 " eor%.w %/d0,%/d0\n" \
|
|
713 " swap %/d0\n" \
|
|
714 " add%.l %/d0,%/d2\n" \
|
|
715 " add%.l %/d3,%/d2\n" \
|
|
716 " jcc 1f\n" \
|
|
717 " add%.l %#65536,%/d1\n" \
|
|
718 "1: swap %/d2\n" \
|
|
719 " moveq %#0,%/d0\n" \
|
|
720 " move%.w %/d2,%/d0\n" \
|
|
721 " move%.w %/d4,%/d2\n" \
|
|
722 " move%.l %/d2,%1\n" \
|
|
723 " add%.l %/d1,%/d0\n" \
|
|
724 " move%.l %/d0,%0" \
|
|
725 : "=g" ((USItype) (xh)), \
|
|
726 "=g" ((USItype) (xl)) \
|
|
727 : "g" ((USItype) (a)), \
|
|
728 "g" ((USItype) (b)) \
|
|
729 : "d0", "d1", "d2", "d3", "d4")
|
|
730 #define UMUL_TIME 100
|
|
731 #define UDIV_TIME 400
|
|
732
|
|
733 #endif /* not mc68020 */
|
|
734
|
|
735 /* The '020, '030, '040 and '060 have bitfield insns.
|
|
736 cpu32 disguises as a 68020, but lacks them. */
|
|
737 #if defined (__mc68020__) && !defined (__mcpu32__)
|
|
738 #define count_leading_zeros(count, x) \
|
|
739 __asm__ ("bfffo %1{%b2:%b2},%0" \
|
|
740 : "=d" ((USItype) (count)) \
|
|
741 : "od" ((USItype) (x)), "n" (0))
|
|
742 /* Some ColdFire architectures have a ff1 instruction supported via
|
|
743 __builtin_clz. */
|
|
744 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
|
|
745 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
|
|
746 #define COUNT_LEADING_ZEROS_0 32
|
|
747 #endif
|
|
748 #endif /* mc68000 */
|
|
749
|
|
750 #if defined (__m88000__) && W_TYPE_SIZE == 32
|
|
751 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
752 __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \
|
|
753 : "=r" ((USItype) (sh)), \
|
|
754 "=&r" ((USItype) (sl)) \
|
|
755 : "%rJ" ((USItype) (ah)), \
|
|
756 "rJ" ((USItype) (bh)), \
|
|
757 "%rJ" ((USItype) (al)), \
|
|
758 "rJ" ((USItype) (bl)))
|
|
759 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
760 __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \
|
|
761 : "=r" ((USItype) (sh)), \
|
|
762 "=&r" ((USItype) (sl)) \
|
|
763 : "rJ" ((USItype) (ah)), \
|
|
764 "rJ" ((USItype) (bh)), \
|
|
765 "rJ" ((USItype) (al)), \
|
|
766 "rJ" ((USItype) (bl)))
|
|
767 #define count_leading_zeros(count, x) \
|
|
768 do { \
|
|
769 USItype __cbtmp; \
|
|
770 __asm__ ("ff1 %0,%1" \
|
|
771 : "=r" (__cbtmp) \
|
|
772 : "r" ((USItype) (x))); \
|
|
773 (count) = __cbtmp ^ 31; \
|
|
774 } while (0)
|
|
775 #define COUNT_LEADING_ZEROS_0 63 /* sic */
|
|
776 #if defined (__mc88110__)
|
|
777 #define umul_ppmm(wh, wl, u, v) \
|
|
778 do { \
|
|
779 union {UDItype __ll; \
|
|
780 struct {USItype __h, __l;} __i; \
|
|
781 } __xx; \
|
|
782 __asm__ ("mulu.d %0,%1,%2" \
|
|
783 : "=r" (__xx.__ll) \
|
|
784 : "r" ((USItype) (u)), \
|
|
785 "r" ((USItype) (v))); \
|
|
786 (wh) = __xx.__i.__h; \
|
|
787 (wl) = __xx.__i.__l; \
|
|
788 } while (0)
|
|
789 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
790 ({union {UDItype __ll; \
|
|
791 struct {USItype __h, __l;} __i; \
|
|
792 } __xx; \
|
|
793 USItype __q; \
|
|
794 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
|
|
795 __asm__ ("divu.d %0,%1,%2" \
|
|
796 : "=r" (__q) \
|
|
797 : "r" (__xx.__ll), \
|
|
798 "r" ((USItype) (d))); \
|
|
799 (r) = (n0) - __q * (d); (q) = __q; })
|
|
800 #define UMUL_TIME 5
|
|
801 #define UDIV_TIME 25
|
|
802 #else
|
|
803 #define UMUL_TIME 17
|
|
804 #define UDIV_TIME 150
|
|
805 #endif /* __mc88110__ */
|
|
806 #endif /* __m88000__ */
|
|
807
|
|
808 #if defined (__mn10300__)
|
|
809 # if defined (__AM33__)
|
|
810 # define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
|
|
811 # define umul_ppmm(w1, w0, u, v) \
|
|
812 asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
|
|
813 # define smul_ppmm(w1, w0, u, v) \
|
|
814 asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
|
|
815 # else
|
|
816 # define umul_ppmm(w1, w0, u, v) \
|
|
817 asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
|
|
818 # define smul_ppmm(w1, w0, u, v) \
|
|
819 asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
|
|
820 # endif
|
|
821 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
822 do { \
|
|
823 DWunion __s, __a, __b; \
|
|
824 __a.s.low = (al); __a.s.high = (ah); \
|
|
825 __b.s.low = (bl); __b.s.high = (bh); \
|
|
826 __s.ll = __a.ll + __b.ll; \
|
|
827 (sl) = __s.s.low; (sh) = __s.s.high; \
|
|
828 } while (0)
|
|
829 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
830 do { \
|
|
831 DWunion __s, __a, __b; \
|
|
832 __a.s.low = (al); __a.s.high = (ah); \
|
|
833 __b.s.low = (bl); __b.s.high = (bh); \
|
|
834 __s.ll = __a.ll - __b.ll; \
|
|
835 (sl) = __s.s.low; (sh) = __s.s.high; \
|
|
836 } while (0)
|
|
837 # define udiv_qrnnd(q, r, nh, nl, d) \
|
|
838 asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
|
|
839 # define sdiv_qrnnd(q, r, nh, nl, d) \
|
|
840 asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
|
|
841 # define UMUL_TIME 3
|
|
842 # define UDIV_TIME 38
|
|
843 #endif
|
|
844
|
|
845 #if defined (__mips__) && W_TYPE_SIZE == 32
|
|
846 #define umul_ppmm(w1, w0, u, v) \
|
|
847 do { \
|
|
848 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
|
|
849 (w1) = (USItype) (__x >> 32); \
|
|
850 (w0) = (USItype) (__x); \
|
|
851 } while (0)
|
|
852 #define UMUL_TIME 10
|
|
853 #define UDIV_TIME 100
|
|
854
|
|
855 #if (__mips == 32 || __mips == 64) && ! defined (__mips16)
|
|
856 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
|
|
857 #define COUNT_LEADING_ZEROS_0 32
|
|
858 #endif
|
|
859 #endif /* __mips__ */
|
|
860
|
|
861 /* FIXME: We should test _IBMR2 here when we add assembly support for the
|
|
862 system vendor compilers.
|
|
863 FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good
|
|
864 enough, since that hits ARM and m68k too. */
|
|
865 #if (defined (_ARCH_PPC) /* AIX */ \
|
|
866 || defined (__powerpc__) /* gcc */ \
|
|
867 || defined (__POWERPC__) /* BEOS */ \
|
|
868 || defined (__ppc__) /* Darwin */ \
|
|
869 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
|
|
870 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
|
|
871 && CPU_FAMILY == PPC) \
|
|
872 ) && W_TYPE_SIZE == 32
|
|
873 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
874 do { \
|
|
875 if (__builtin_constant_p (bh) && (bh) == 0) \
|
|
876 __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
|
|
877 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
|
878 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
|
|
879 __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
|
|
880 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
|
881 else \
|
|
882 __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
|
|
883 : "=r" (sh), "=&r" (sl) \
|
|
884 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
|
|
885 } while (0)
|
|
886 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
887 do { \
|
|
888 if (__builtin_constant_p (ah) && (ah) == 0) \
|
|
889 __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
|
|
890 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
|
891 else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
|
|
892 __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
|
|
893 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
|
894 else if (__builtin_constant_p (bh) && (bh) == 0) \
|
|
895 __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
|
|
896 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
|
897 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
|
|
898 __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
|
|
899 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
|
900 else \
|
|
901 __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
|
|
902 : "=r" (sh), "=&r" (sl) \
|
|
903 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
|
|
904 } while (0)
|
|
905 #define count_leading_zeros(count, x) \
|
|
906 __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
|
|
907 #define COUNT_LEADING_ZEROS_0 32
|
|
908 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
|
|
909 || defined (__ppc__) \
|
|
910 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
|
|
911 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
|
|
912 && CPU_FAMILY == PPC)
|
|
913 #define umul_ppmm(ph, pl, m0, m1) \
|
|
914 do { \
|
|
915 USItype __m0 = (m0), __m1 = (m1); \
|
|
916 __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
|
917 (pl) = __m0 * __m1; \
|
|
918 } while (0)
|
|
919 #define UMUL_TIME 15
|
|
920 #define smul_ppmm(ph, pl, m0, m1) \
|
|
921 do { \
|
|
922 SItype __m0 = (m0), __m1 = (m1); \
|
|
923 __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
|
924 (pl) = __m0 * __m1; \
|
|
925 } while (0)
|
|
926 #define SMUL_TIME 14
|
|
927 #define UDIV_TIME 120
|
|
928 #endif
|
|
929 #endif /* 32-bit POWER architecture variants. */
|
|
930
|
|
931 /* We should test _IBMR2 here when we add assembly support for the system
|
|
932 vendor compilers. */
|
|
933 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
|
|
934 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
935 do { \
|
|
936 if (__builtin_constant_p (bh) && (bh) == 0) \
|
|
937 __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
|
|
938 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
|
939 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
|
|
940 __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
|
|
941 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
|
942 else \
|
|
943 __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
|
|
944 : "=r" (sh), "=&r" (sl) \
|
|
945 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
|
|
946 } while (0)
|
|
947 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
948 do { \
|
|
949 if (__builtin_constant_p (ah) && (ah) == 0) \
|
|
950 __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
|
|
951 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
|
952 else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
|
|
953 __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
|
|
954 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
|
955 else if (__builtin_constant_p (bh) && (bh) == 0) \
|
|
956 __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
|
|
957 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
|
958 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
|
|
959 __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
|
|
960 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
|
961 else \
|
|
962 __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
|
|
963 : "=r" (sh), "=&r" (sl) \
|
|
964 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
|
|
965 } while (0)
|
|
966 #define count_leading_zeros(count, x) \
|
|
967 __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
|
|
968 #define COUNT_LEADING_ZEROS_0 64
|
|
969 #define umul_ppmm(ph, pl, m0, m1) \
|
|
970 do { \
|
|
971 UDItype __m0 = (m0), __m1 = (m1); \
|
|
972 __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
|
973 (pl) = __m0 * __m1; \
|
|
974 } while (0)
|
|
975 #define UMUL_TIME 15
|
|
976 #define smul_ppmm(ph, pl, m0, m1) \
|
|
977 do { \
|
|
978 DItype __m0 = (m0), __m1 = (m1); \
|
|
979 __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
|
980 (pl) = __m0 * __m1; \
|
|
981 } while (0)
|
|
982 #define SMUL_TIME 14 /* ??? */
|
|
983 #define UDIV_TIME 120 /* ??? */
|
|
984 #endif /* 64-bit PowerPC. */
|
|
985
|
|
986 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
|
|
987 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
988 __asm__ ("a %1,%5\n\tae %0,%3" \
|
|
989 : "=r" ((USItype) (sh)), \
|
|
990 "=&r" ((USItype) (sl)) \
|
|
991 : "%0" ((USItype) (ah)), \
|
|
992 "r" ((USItype) (bh)), \
|
|
993 "%1" ((USItype) (al)), \
|
|
994 "r" ((USItype) (bl)))
|
|
995 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
996 __asm__ ("s %1,%5\n\tse %0,%3" \
|
|
997 : "=r" ((USItype) (sh)), \
|
|
998 "=&r" ((USItype) (sl)) \
|
|
999 : "0" ((USItype) (ah)), \
|
|
1000 "r" ((USItype) (bh)), \
|
|
1001 "1" ((USItype) (al)), \
|
|
1002 "r" ((USItype) (bl)))
|
|
1003 #define umul_ppmm(ph, pl, m0, m1) \
|
|
1004 do { \
|
|
1005 USItype __m0 = (m0), __m1 = (m1); \
|
|
1006 __asm__ ( \
|
|
1007 "s r2,r2\n" \
|
|
1008 " mts r10,%2\n" \
|
|
1009 " m r2,%3\n" \
|
|
1010 " m r2,%3\n" \
|
|
1011 " m r2,%3\n" \
|
|
1012 " m r2,%3\n" \
|
|
1013 " m r2,%3\n" \
|
|
1014 " m r2,%3\n" \
|
|
1015 " m r2,%3\n" \
|
|
1016 " m r2,%3\n" \
|
|
1017 " m r2,%3\n" \
|
|
1018 " m r2,%3\n" \
|
|
1019 " m r2,%3\n" \
|
|
1020 " m r2,%3\n" \
|
|
1021 " m r2,%3\n" \
|
|
1022 " m r2,%3\n" \
|
|
1023 " m r2,%3\n" \
|
|
1024 " m r2,%3\n" \
|
|
1025 " cas %0,r2,r0\n" \
|
|
1026 " mfs r10,%1" \
|
|
1027 : "=r" ((USItype) (ph)), \
|
|
1028 "=r" ((USItype) (pl)) \
|
|
1029 : "%r" (__m0), \
|
|
1030 "r" (__m1) \
|
|
1031 : "r2"); \
|
|
1032 (ph) += ((((SItype) __m0 >> 31) & __m1) \
|
|
1033 + (((SItype) __m1 >> 31) & __m0)); \
|
|
1034 } while (0)
|
|
1035 #define UMUL_TIME 20
|
|
1036 #define UDIV_TIME 200
|
|
1037 #define count_leading_zeros(count, x) \
|
|
1038 do { \
|
|
1039 if ((x) >= 0x10000) \
|
|
1040 __asm__ ("clz %0,%1" \
|
|
1041 : "=r" ((USItype) (count)) \
|
|
1042 : "r" ((USItype) (x) >> 16)); \
|
|
1043 else \
|
|
1044 { \
|
|
1045 __asm__ ("clz %0,%1" \
|
|
1046 : "=r" ((USItype) (count)) \
|
|
1047 : "r" ((USItype) (x))); \
|
|
1048 (count) += 16; \
|
|
1049 } \
|
|
1050 } while (0)
|
|
1051 #endif
|
|
1052
|
131
|
1053 #if defined(__riscv)
|
|
1054 #ifdef __riscv_mul
|
|
1055 #define __umulsidi3(u,v) ((UDWtype)(UWtype)(u) * (UWtype)(v))
|
|
1056 #define __muluw3(a, b) ((UWtype)(a) * (UWtype)(b))
|
|
1057 #else
|
|
1058 #if __riscv_xlen == 32
|
|
1059 #define MULUW3 "call __mulsi3"
|
|
1060 #elif __riscv_xlen == 64
|
|
1061 #define MULUW3 "call __muldi3"
|
|
1062 #else
|
|
1063 #error unsupport xlen
|
|
1064 #endif /* __riscv_xlen */
|
|
1065 /* We rely on the fact that MULUW3 doesn't clobber the t-registers.
|
|
1066 It can get better register allocation result. */
|
|
1067 #define __muluw3(a, b) \
|
|
1068 ({ \
|
|
1069 register UWtype __op0 asm ("a0") = a; \
|
|
1070 register UWtype __op1 asm ("a1") = b; \
|
|
1071 asm volatile (MULUW3 \
|
|
1072 : "+r" (__op0), "+r" (__op1) \
|
|
1073 : \
|
|
1074 : "ra", "a2", "a3"); \
|
|
1075 __op0; \
|
|
1076 })
|
|
1077 #endif /* __riscv_mul */
|
|
1078 #define umul_ppmm(w1, w0, u, v) \
|
|
1079 do { \
|
|
1080 UWtype __x0, __x1, __x2, __x3; \
|
|
1081 UHWtype __ul, __vl, __uh, __vh; \
|
|
1082 \
|
|
1083 __ul = __ll_lowpart (u); \
|
|
1084 __uh = __ll_highpart (u); \
|
|
1085 __vl = __ll_lowpart (v); \
|
|
1086 __vh = __ll_highpart (v); \
|
|
1087 \
|
|
1088 __x0 = __muluw3 (__ul, __vl); \
|
|
1089 __x1 = __muluw3 (__ul, __vh); \
|
|
1090 __x2 = __muluw3 (__uh, __vl); \
|
|
1091 __x3 = __muluw3 (__uh, __vh); \
|
|
1092 \
|
|
1093 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
|
|
1094 __x1 += __x2; /* but this indeed can */ \
|
|
1095 if (__x1 < __x2) /* did we get it? */ \
|
|
1096 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
|
|
1097 \
|
|
1098 (w1) = __x3 + __ll_highpart (__x1); \
|
|
1099 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
|
|
1100 } while (0)
|
|
1101 #endif /* __riscv */
|
|
1102
|
111
|
1103 #if defined(__sh__) && W_TYPE_SIZE == 32
|
|
1104 #ifndef __sh1__
|
|
1105 #define umul_ppmm(w1, w0, u, v) \
|
|
1106 __asm__ ( \
|
|
1107 "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0 mach,%0" \
|
|
1108 : "=r<" ((USItype)(w1)), \
|
|
1109 "=r<" ((USItype)(w0)) \
|
|
1110 : "r" ((USItype)(u)), \
|
|
1111 "r" ((USItype)(v)) \
|
|
1112 : "macl", "mach")
|
|
1113 #define UMUL_TIME 5
|
|
1114 #endif
|
|
1115
|
|
1116 /* This is the same algorithm as __udiv_qrnnd_c. */
|
|
1117 #define UDIV_NEEDS_NORMALIZATION 1
|
|
1118
|
|
1119 #ifdef __FDPIC__
|
|
1120 /* FDPIC needs a special version of the asm fragment to extract the
|
|
1121 code address from the function descriptor. __udiv_qrnnd_16 is
|
|
1122 assumed to be local and not to use the GOT, so loading r12 is
|
|
1123 not needed. */
|
|
1124 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
1125 do { \
|
|
1126 extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
|
|
1127 __attribute__ ((visibility ("hidden"))); \
|
|
1128 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
|
|
1129 __asm__ ( \
|
|
1130 "mov%M4 %4,r5\n" \
|
|
1131 " swap.w %3,r4\n" \
|
|
1132 " swap.w r5,r6\n" \
|
|
1133 " mov.l @%5,r2\n" \
|
|
1134 " jsr @r2\n" \
|
|
1135 " shll16 r6\n" \
|
|
1136 " swap.w r4,r4\n" \
|
|
1137 " mov.l @%5,r2\n" \
|
|
1138 " jsr @r2\n" \
|
|
1139 " swap.w r1,%0\n" \
|
|
1140 " or r1,%0" \
|
|
1141 : "=r" (q), "=&z" (r) \
|
|
1142 : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
|
|
1143 : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
|
|
1144 } while (0)
|
|
1145 #else
|
|
1146 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
1147 do { \
|
|
1148 extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
|
|
1149 __attribute__ ((visibility ("hidden"))); \
|
|
1150 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
|
|
1151 __asm__ ( \
|
|
1152 "mov%M4 %4,r5\n" \
|
|
1153 " swap.w %3,r4\n" \
|
|
1154 " swap.w r5,r6\n" \
|
|
1155 " jsr @%5\n" \
|
|
1156 " shll16 r6\n" \
|
|
1157 " swap.w r4,r4\n" \
|
|
1158 " jsr @%5\n" \
|
|
1159 " swap.w r1,%0\n" \
|
|
1160 " or r1,%0" \
|
|
1161 : "=r" (q), "=&z" (r) \
|
|
1162 : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
|
|
1163 : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
|
|
1164 } while (0)
|
|
1165 #endif /* __FDPIC__ */
|
|
1166
|
|
1167 #define UDIV_TIME 80
|
|
1168
|
|
1169 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1170 __asm__ ("clrt;subc %5,%1; subc %4,%0" \
|
|
1171 : "=r" (sh), "=r" (sl) \
|
|
1172 : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
|
|
1173
|
|
1174 #endif /* __sh__ */
|
|
1175
|
|
1176 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
|
|
1177 && W_TYPE_SIZE == 32
|
|
1178 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1179 __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
|
|
1180 : "=r" ((USItype) (sh)), \
|
|
1181 "=&r" ((USItype) (sl)) \
|
|
1182 : "%rJ" ((USItype) (ah)), \
|
|
1183 "rI" ((USItype) (bh)), \
|
|
1184 "%rJ" ((USItype) (al)), \
|
|
1185 "rI" ((USItype) (bl)) \
|
|
1186 __CLOBBER_CC)
|
|
1187 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1188 __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
|
|
1189 : "=r" ((USItype) (sh)), \
|
|
1190 "=&r" ((USItype) (sl)) \
|
|
1191 : "rJ" ((USItype) (ah)), \
|
|
1192 "rI" ((USItype) (bh)), \
|
|
1193 "rJ" ((USItype) (al)), \
|
|
1194 "rI" ((USItype) (bl)) \
|
|
1195 __CLOBBER_CC)
|
|
1196 #if defined (__sparc_v9__)
|
|
1197 #define umul_ppmm(w1, w0, u, v) \
|
|
1198 do { \
|
|
1199 register USItype __g1 asm ("g1"); \
|
|
1200 __asm__ ("umul\t%2,%3,%1\n\t" \
|
|
1201 "srlx\t%1, 32, %0" \
|
|
1202 : "=r" ((USItype) (w1)), \
|
|
1203 "=r" (__g1) \
|
|
1204 : "r" ((USItype) (u)), \
|
|
1205 "r" ((USItype) (v))); \
|
|
1206 (w0) = __g1; \
|
|
1207 } while (0)
|
|
1208 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
|
|
1209 __asm__ ("mov\t%2,%%y\n\t" \
|
|
1210 "udiv\t%3,%4,%0\n\t" \
|
|
1211 "umul\t%0,%4,%1\n\t" \
|
|
1212 "sub\t%3,%1,%1" \
|
|
1213 : "=&r" ((USItype) (__q)), \
|
|
1214 "=&r" ((USItype) (__r)) \
|
|
1215 : "r" ((USItype) (__n1)), \
|
|
1216 "r" ((USItype) (__n0)), \
|
|
1217 "r" ((USItype) (__d)))
|
|
1218 #else
|
|
1219 #if defined (__sparc_v8__)
|
|
1220 #define umul_ppmm(w1, w0, u, v) \
|
|
1221 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
|
|
1222 : "=r" ((USItype) (w1)), \
|
|
1223 "=r" ((USItype) (w0)) \
|
|
1224 : "r" ((USItype) (u)), \
|
|
1225 "r" ((USItype) (v)))
|
|
1226 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
|
|
1227 __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
|
|
1228 : "=&r" ((USItype) (__q)), \
|
|
1229 "=&r" ((USItype) (__r)) \
|
|
1230 : "r" ((USItype) (__n1)), \
|
|
1231 "r" ((USItype) (__n0)), \
|
|
1232 "r" ((USItype) (__d)))
|
|
1233 #else
|
|
1234 #if defined (__sparclite__)
|
|
1235 /* This has hardware multiply but not divide. It also has two additional
|
|
1236 instructions scan (ffs from high bit) and divscc. */
|
|
1237 #define umul_ppmm(w1, w0, u, v) \
|
|
1238 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
|
|
1239 : "=r" ((USItype) (w1)), \
|
|
1240 "=r" ((USItype) (w0)) \
|
|
1241 : "r" ((USItype) (u)), \
|
|
1242 "r" ((USItype) (v)))
|
|
1243 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
1244 __asm__ ("! Inlined udiv_qrnnd\n" \
|
|
1245 " wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
|
|
1246 " tst %%g0\n" \
|
|
1247 " divscc %3,%4,%%g1\n" \
|
|
1248 " divscc %%g1,%4,%%g1\n" \
|
|
1249 " divscc %%g1,%4,%%g1\n" \
|
|
1250 " divscc %%g1,%4,%%g1\n" \
|
|
1251 " divscc %%g1,%4,%%g1\n" \
|
|
1252 " divscc %%g1,%4,%%g1\n" \
|
|
1253 " divscc %%g1,%4,%%g1\n" \
|
|
1254 " divscc %%g1,%4,%%g1\n" \
|
|
1255 " divscc %%g1,%4,%%g1\n" \
|
|
1256 " divscc %%g1,%4,%%g1\n" \
|
|
1257 " divscc %%g1,%4,%%g1\n" \
|
|
1258 " divscc %%g1,%4,%%g1\n" \
|
|
1259 " divscc %%g1,%4,%%g1\n" \
|
|
1260 " divscc %%g1,%4,%%g1\n" \
|
|
1261 " divscc %%g1,%4,%%g1\n" \
|
|
1262 " divscc %%g1,%4,%%g1\n" \
|
|
1263 " divscc %%g1,%4,%%g1\n" \
|
|
1264 " divscc %%g1,%4,%%g1\n" \
|
|
1265 " divscc %%g1,%4,%%g1\n" \
|
|
1266 " divscc %%g1,%4,%%g1\n" \
|
|
1267 " divscc %%g1,%4,%%g1\n" \
|
|
1268 " divscc %%g1,%4,%%g1\n" \
|
|
1269 " divscc %%g1,%4,%%g1\n" \
|
|
1270 " divscc %%g1,%4,%%g1\n" \
|
|
1271 " divscc %%g1,%4,%%g1\n" \
|
|
1272 " divscc %%g1,%4,%%g1\n" \
|
|
1273 " divscc %%g1,%4,%%g1\n" \
|
|
1274 " divscc %%g1,%4,%%g1\n" \
|
|
1275 " divscc %%g1,%4,%%g1\n" \
|
|
1276 " divscc %%g1,%4,%%g1\n" \
|
|
1277 " divscc %%g1,%4,%%g1\n" \
|
|
1278 " divscc %%g1,%4,%0\n" \
|
|
1279 " rd %%y,%1\n" \
|
|
1280 " bl,a 1f\n" \
|
|
1281 " add %1,%4,%1\n" \
|
|
1282 "1: ! End of inline udiv_qrnnd" \
|
|
1283 : "=r" ((USItype) (q)), \
|
|
1284 "=r" ((USItype) (r)) \
|
|
1285 : "r" ((USItype) (n1)), \
|
|
1286 "r" ((USItype) (n0)), \
|
|
1287 "rI" ((USItype) (d)) \
|
|
1288 : "g1" __AND_CLOBBER_CC)
|
|
1289 #define UDIV_TIME 37
|
|
1290 #define count_leading_zeros(count, x) \
|
|
1291 do { \
|
|
1292 __asm__ ("scan %1,1,%0" \
|
|
1293 : "=r" ((USItype) (count)) \
|
|
1294 : "r" ((USItype) (x))); \
|
|
1295 } while (0)
|
|
1296 /* Early sparclites return 63 for an argument of 0, but they warn that future
|
|
1297 implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
|
|
1298 undefined. */
|
|
1299 #else
|
|
1300 /* SPARC without integer multiplication and divide instructions.
|
|
1301 (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
|
|
1302 #define umul_ppmm(w1, w0, u, v) \
|
|
1303 __asm__ ("! Inlined umul_ppmm\n" \
|
|
1304 " wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n"\
|
|
1305 " sra %3,31,%%o5 ! Don't move this insn\n" \
|
|
1306 " and %2,%%o5,%%o5 ! Don't move this insn\n" \
|
|
1307 " andcc %%g0,0,%%g1 ! Don't move this insn\n" \
|
|
1308 " mulscc %%g1,%3,%%g1\n" \
|
|
1309 " mulscc %%g1,%3,%%g1\n" \
|
|
1310 " mulscc %%g1,%3,%%g1\n" \
|
|
1311 " mulscc %%g1,%3,%%g1\n" \
|
|
1312 " mulscc %%g1,%3,%%g1\n" \
|
|
1313 " mulscc %%g1,%3,%%g1\n" \
|
|
1314 " mulscc %%g1,%3,%%g1\n" \
|
|
1315 " mulscc %%g1,%3,%%g1\n" \
|
|
1316 " mulscc %%g1,%3,%%g1\n" \
|
|
1317 " mulscc %%g1,%3,%%g1\n" \
|
|
1318 " mulscc %%g1,%3,%%g1\n" \
|
|
1319 " mulscc %%g1,%3,%%g1\n" \
|
|
1320 " mulscc %%g1,%3,%%g1\n" \
|
|
1321 " mulscc %%g1,%3,%%g1\n" \
|
|
1322 " mulscc %%g1,%3,%%g1\n" \
|
|
1323 " mulscc %%g1,%3,%%g1\n" \
|
|
1324 " mulscc %%g1,%3,%%g1\n" \
|
|
1325 " mulscc %%g1,%3,%%g1\n" \
|
|
1326 " mulscc %%g1,%3,%%g1\n" \
|
|
1327 " mulscc %%g1,%3,%%g1\n" \
|
|
1328 " mulscc %%g1,%3,%%g1\n" \
|
|
1329 " mulscc %%g1,%3,%%g1\n" \
|
|
1330 " mulscc %%g1,%3,%%g1\n" \
|
|
1331 " mulscc %%g1,%3,%%g1\n" \
|
|
1332 " mulscc %%g1,%3,%%g1\n" \
|
|
1333 " mulscc %%g1,%3,%%g1\n" \
|
|
1334 " mulscc %%g1,%3,%%g1\n" \
|
|
1335 " mulscc %%g1,%3,%%g1\n" \
|
|
1336 " mulscc %%g1,%3,%%g1\n" \
|
|
1337 " mulscc %%g1,%3,%%g1\n" \
|
|
1338 " mulscc %%g1,%3,%%g1\n" \
|
|
1339 " mulscc %%g1,%3,%%g1\n" \
|
|
1340 " mulscc %%g1,0,%%g1\n" \
|
|
1341 " add %%g1,%%o5,%0\n" \
|
|
1342 " rd %%y,%1" \
|
|
1343 : "=r" ((USItype) (w1)), \
|
|
1344 "=r" ((USItype) (w0)) \
|
|
1345 : "%rI" ((USItype) (u)), \
|
|
1346 "r" ((USItype) (v)) \
|
|
1347 : "g1", "o5" __AND_CLOBBER_CC)
|
|
1348 #define UMUL_TIME 39 /* 39 instructions */
|
|
1349 /* It's quite necessary to add this much assembler for the sparc.
|
|
1350 The default udiv_qrnnd (in C) is more than 10 times slower! */
|
|
1351 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
|
|
1352 __asm__ ("! Inlined udiv_qrnnd\n" \
|
|
1353 " mov 32,%%g1\n" \
|
|
1354 " subcc %1,%2,%%g0\n" \
|
|
1355 "1: bcs 5f\n" \
|
|
1356 " addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
|
|
1357 " sub %1,%2,%1 ! this kills msb of n\n" \
|
|
1358 " addx %1,%1,%1 ! so this can't give carry\n" \
|
|
1359 " subcc %%g1,1,%%g1\n" \
|
|
1360 "2: bne 1b\n" \
|
|
1361 " subcc %1,%2,%%g0\n" \
|
|
1362 " bcs 3f\n" \
|
|
1363 " addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
|
|
1364 " b 3f\n" \
|
|
1365 " sub %1,%2,%1 ! this kills msb of n\n" \
|
|
1366 "4: sub %1,%2,%1\n" \
|
|
1367 "5: addxcc %1,%1,%1\n" \
|
|
1368 " bcc 2b\n" \
|
|
1369 " subcc %%g1,1,%%g1\n" \
|
|
1370 "! Got carry from n. Subtract next step to cancel this carry.\n" \
|
|
1371 " bne 4b\n" \
|
|
1372 " addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n" \
|
|
1373 " sub %1,%2,%1\n" \
|
|
1374 "3: xnor %0,0,%0\n" \
|
|
1375 " ! End of inline udiv_qrnnd" \
|
|
1376 : "=&r" ((USItype) (__q)), \
|
|
1377 "=&r" ((USItype) (__r)) \
|
|
1378 : "r" ((USItype) (__d)), \
|
|
1379 "1" ((USItype) (__n1)), \
|
|
1380 "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
|
|
1381 #define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */
|
|
1382 #endif /* __sparclite__ */
|
|
1383 #endif /* __sparc_v8__ */
|
|
1384 #endif /* __sparc_v9__ */
|
|
1385 #endif /* sparc32 */
|
|
1386
|
|
1387 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
|
|
1388 && W_TYPE_SIZE == 64
|
|
1389 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1390 do { \
|
|
1391 UDItype __carry = 0; \
|
|
1392 __asm__ ("addcc\t%r5,%6,%1\n\t" \
|
|
1393 "add\t%r3,%4,%0\n\t" \
|
|
1394 "movcs\t%%xcc, 1, %2\n\t" \
|
|
1395 "add\t%0, %2, %0" \
|
|
1396 : "=r" ((UDItype)(sh)), \
|
|
1397 "=&r" ((UDItype)(sl)), \
|
|
1398 "+r" (__carry) \
|
|
1399 : "%rJ" ((UDItype)(ah)), \
|
|
1400 "rI" ((UDItype)(bh)), \
|
|
1401 "%rJ" ((UDItype)(al)), \
|
|
1402 "rI" ((UDItype)(bl)) \
|
|
1403 __CLOBBER_CC); \
|
|
1404 } while (0)
|
|
1405
|
|
1406 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1407 do { \
|
|
1408 UDItype __carry = 0; \
|
|
1409 __asm__ ("subcc\t%r5,%6,%1\n\t" \
|
|
1410 "sub\t%r3,%4,%0\n\t" \
|
|
1411 "movcs\t%%xcc, 1, %2\n\t" \
|
|
1412 "sub\t%0, %2, %0" \
|
|
1413 : "=r" ((UDItype)(sh)), \
|
|
1414 "=&r" ((UDItype)(sl)), \
|
|
1415 "+r" (__carry) \
|
|
1416 : "%rJ" ((UDItype)(ah)), \
|
|
1417 "rI" ((UDItype)(bh)), \
|
|
1418 "%rJ" ((UDItype)(al)), \
|
|
1419 "rI" ((UDItype)(bl)) \
|
|
1420 __CLOBBER_CC); \
|
|
1421 } while (0)
|
|
1422
|
|
1423 #define umul_ppmm(wh, wl, u, v) \
|
|
1424 do { \
|
|
1425 UDItype tmp1, tmp2, tmp3, tmp4; \
|
|
1426 __asm__ __volatile__ ( \
|
|
1427 "srl %7,0,%3\n\t" \
|
|
1428 "mulx %3,%6,%1\n\t" \
|
|
1429 "srlx %6,32,%2\n\t" \
|
|
1430 "mulx %2,%3,%4\n\t" \
|
|
1431 "sllx %4,32,%5\n\t" \
|
|
1432 "srl %6,0,%3\n\t" \
|
|
1433 "sub %1,%5,%5\n\t" \
|
|
1434 "srlx %5,32,%5\n\t" \
|
|
1435 "addcc %4,%5,%4\n\t" \
|
|
1436 "srlx %7,32,%5\n\t" \
|
|
1437 "mulx %3,%5,%3\n\t" \
|
|
1438 "mulx %2,%5,%5\n\t" \
|
|
1439 "sethi %%hi(0x80000000),%2\n\t" \
|
|
1440 "addcc %4,%3,%4\n\t" \
|
|
1441 "srlx %4,32,%4\n\t" \
|
|
1442 "add %2,%2,%2\n\t" \
|
|
1443 "movcc %%xcc,%%g0,%2\n\t" \
|
|
1444 "addcc %5,%4,%5\n\t" \
|
|
1445 "sllx %3,32,%3\n\t" \
|
|
1446 "add %1,%3,%1\n\t" \
|
|
1447 "add %5,%2,%0" \
|
|
1448 : "=r" ((UDItype)(wh)), \
|
|
1449 "=&r" ((UDItype)(wl)), \
|
|
1450 "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
|
|
1451 : "r" ((UDItype)(u)), \
|
|
1452 "r" ((UDItype)(v)) \
|
|
1453 __CLOBBER_CC); \
|
|
1454 } while (0)
|
|
1455 #define UMUL_TIME 96
|
|
1456 #define UDIV_TIME 230
|
|
1457 #endif /* sparc64 */
|
|
1458
|
|
1459 #if defined (__vax__) && W_TYPE_SIZE == 32
|
|
1460 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1461 __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
|
|
1462 : "=g" ((USItype) (sh)), \
|
|
1463 "=&g" ((USItype) (sl)) \
|
|
1464 : "%0" ((USItype) (ah)), \
|
|
1465 "g" ((USItype) (bh)), \
|
|
1466 "%1" ((USItype) (al)), \
|
|
1467 "g" ((USItype) (bl)))
|
|
1468 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1469 __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \
|
|
1470 : "=g" ((USItype) (sh)), \
|
|
1471 "=&g" ((USItype) (sl)) \
|
|
1472 : "0" ((USItype) (ah)), \
|
|
1473 "g" ((USItype) (bh)), \
|
|
1474 "1" ((USItype) (al)), \
|
|
1475 "g" ((USItype) (bl)))
|
|
1476 #define umul_ppmm(xh, xl, m0, m1) \
|
|
1477 do { \
|
|
1478 union { \
|
|
1479 UDItype __ll; \
|
|
1480 struct {USItype __l, __h;} __i; \
|
|
1481 } __xx; \
|
|
1482 USItype __m0 = (m0), __m1 = (m1); \
|
|
1483 __asm__ ("emul %1,%2,$0,%0" \
|
|
1484 : "=r" (__xx.__ll) \
|
|
1485 : "g" (__m0), \
|
|
1486 "g" (__m1)); \
|
|
1487 (xh) = __xx.__i.__h; \
|
|
1488 (xl) = __xx.__i.__l; \
|
|
1489 (xh) += ((((SItype) __m0 >> 31) & __m1) \
|
|
1490 + (((SItype) __m1 >> 31) & __m0)); \
|
|
1491 } while (0)
|
|
1492 #define sdiv_qrnnd(q, r, n1, n0, d) \
|
|
1493 do { \
|
|
1494 union {DItype __ll; \
|
|
1495 struct {SItype __l, __h;} __i; \
|
|
1496 } __xx; \
|
|
1497 __xx.__i.__h = n1; __xx.__i.__l = n0; \
|
|
1498 __asm__ ("ediv %3,%2,%0,%1" \
|
|
1499 : "=g" (q), "=g" (r) \
|
|
1500 : "g" (__xx.__ll), "g" (d)); \
|
|
1501 } while (0)
|
|
1502 #endif /* __vax__ */
|
|
1503
|
|
1504 #ifdef _TMS320C6X
|
|
1505 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1506 do \
|
|
1507 { \
|
|
1508 UDItype __ll; \
|
|
1509 __asm__ ("addu .l1 %1, %2, %0" \
|
|
1510 : "=a" (__ll) : "a" (al), "a" (bl)); \
|
|
1511 (sl) = (USItype)__ll; \
|
|
1512 (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh); \
|
|
1513 } \
|
|
1514 while (0)
|
|
1515
|
|
1516 #ifdef _TMS320C6400_PLUS
|
|
1517 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
|
|
1518 #define umul_ppmm(w1, w0, u, v) \
|
|
1519 do { \
|
|
1520 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
|
|
1521 (w1) = (USItype) (__x >> 32); \
|
|
1522 (w0) = (USItype) (__x); \
|
|
1523 } while (0)
|
|
1524 #endif /* _TMS320C6400_PLUS */
|
|
1525
|
|
1526 #define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
|
|
1527 #ifdef _TMS320C6400
|
|
1528 #define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
|
|
1529 #endif
|
|
1530 #define UMUL_TIME 4
|
|
1531 #define UDIV_TIME 40
|
|
1532 #endif /* _TMS320C6X */
|
|
1533
|
|
1534 #if defined (__xtensa__) && W_TYPE_SIZE == 32
|
|
1535 /* This code is not Xtensa-configuration-specific, so rely on the compiler
|
|
1536 to expand builtin functions depending on what configuration features
|
|
1537 are available. This avoids library calls when the operation can be
|
|
1538 performed in-line. */
|
|
1539 #define umul_ppmm(w1, w0, u, v) \
|
|
1540 do { \
|
|
1541 DWunion __w; \
|
|
1542 __w.ll = __builtin_umulsidi3 (u, v); \
|
|
1543 w1 = __w.s.high; \
|
|
1544 w0 = __w.s.low; \
|
|
1545 } while (0)
|
|
1546 #define __umulsidi3(u, v) __builtin_umulsidi3 (u, v)
|
|
1547 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
|
|
1548 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
|
|
1549 #endif /* __xtensa__ */
|
|
1550
|
|
1551 #if defined xstormy16
|
|
1552 extern UHItype __stormy16_count_leading_zeros (UHItype);
|
|
1553 #define count_leading_zeros(count, x) \
|
|
1554 do \
|
|
1555 { \
|
|
1556 UHItype size; \
|
|
1557 \
|
|
1558 /* We assume that W_TYPE_SIZE is a multiple of 16... */ \
|
|
1559 for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16) \
|
|
1560 { \
|
|
1561 UHItype c; \
|
|
1562 \
|
|
1563 c = __clzhi2 ((x) >> (size - 16)); \
|
|
1564 (count) += c; \
|
|
1565 if (c != 16) \
|
|
1566 break; \
|
|
1567 } \
|
|
1568 } \
|
|
1569 while (0)
|
|
1570 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
|
|
1571 #endif
|
|
1572
|
|
1573 #if defined (__z8000__) && W_TYPE_SIZE == 16
|
|
1574 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1575 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
|
|
1576 : "=r" ((unsigned int)(sh)), \
|
|
1577 "=&r" ((unsigned int)(sl)) \
|
|
1578 : "%0" ((unsigned int)(ah)), \
|
|
1579 "r" ((unsigned int)(bh)), \
|
|
1580 "%1" ((unsigned int)(al)), \
|
|
1581 "rQR" ((unsigned int)(bl)))
|
|
1582 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1583 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
|
|
1584 : "=r" ((unsigned int)(sh)), \
|
|
1585 "=&r" ((unsigned int)(sl)) \
|
|
1586 : "0" ((unsigned int)(ah)), \
|
|
1587 "r" ((unsigned int)(bh)), \
|
|
1588 "1" ((unsigned int)(al)), \
|
|
1589 "rQR" ((unsigned int)(bl)))
|
|
1590 #define umul_ppmm(xh, xl, m0, m1) \
|
|
1591 do { \
|
|
1592 union {long int __ll; \
|
|
1593 struct {unsigned int __h, __l;} __i; \
|
|
1594 } __xx; \
|
|
1595 unsigned int __m0 = (m0), __m1 = (m1); \
|
|
1596 __asm__ ("mult %S0,%H3" \
|
|
1597 : "=r" (__xx.__i.__h), \
|
|
1598 "=r" (__xx.__i.__l) \
|
|
1599 : "%1" (__m0), \
|
|
1600 "rQR" (__m1)); \
|
|
1601 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
|
|
1602 (xh) += ((((signed int) __m0 >> 15) & __m1) \
|
|
1603 + (((signed int) __m1 >> 15) & __m0)); \
|
|
1604 } while (0)
|
|
1605 #endif /* __z8000__ */
|
|
1606
|
|
1607 #endif /* __GNUC__ */
|
|
1608
|
|
1609 /* If this machine has no inline assembler, use C macros. */
|
|
1610
|
|
1611 #if !defined (add_ssaaaa)
|
|
1612 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1613 do { \
|
|
1614 UWtype __x; \
|
|
1615 __x = (al) + (bl); \
|
|
1616 (sh) = (ah) + (bh) + (__x < (al)); \
|
|
1617 (sl) = __x; \
|
|
1618 } while (0)
|
|
1619 #endif
|
|
1620
|
|
1621 #if !defined (sub_ddmmss)
|
|
1622 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1623 do { \
|
|
1624 UWtype __x; \
|
|
1625 __x = (al) - (bl); \
|
|
1626 (sh) = (ah) - (bh) - (__x > (al)); \
|
|
1627 (sl) = __x; \
|
|
1628 } while (0)
|
|
1629 #endif
|
|
1630
|
|
1631 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
|
|
1632 smul_ppmm. */
|
|
1633 #if !defined (umul_ppmm) && defined (smul_ppmm)
|
|
1634 #define umul_ppmm(w1, w0, u, v) \
|
|
1635 do { \
|
|
1636 UWtype __w1; \
|
|
1637 UWtype __xm0 = (u), __xm1 = (v); \
|
|
1638 smul_ppmm (__w1, w0, __xm0, __xm1); \
|
|
1639 (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
|
|
1640 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
|
|
1641 } while (0)
|
|
1642 #endif
|
|
1643
|
|
1644 /* If we still don't have umul_ppmm, define it using plain C. */
|
|
1645 #if !defined (umul_ppmm)
|
|
1646 #define umul_ppmm(w1, w0, u, v) \
|
|
1647 do { \
|
|
1648 UWtype __x0, __x1, __x2, __x3; \
|
|
1649 UHWtype __ul, __vl, __uh, __vh; \
|
|
1650 \
|
|
1651 __ul = __ll_lowpart (u); \
|
|
1652 __uh = __ll_highpart (u); \
|
|
1653 __vl = __ll_lowpart (v); \
|
|
1654 __vh = __ll_highpart (v); \
|
|
1655 \
|
|
1656 __x0 = (UWtype) __ul * __vl; \
|
|
1657 __x1 = (UWtype) __ul * __vh; \
|
|
1658 __x2 = (UWtype) __uh * __vl; \
|
|
1659 __x3 = (UWtype) __uh * __vh; \
|
|
1660 \
|
|
1661 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
|
|
1662 __x1 += __x2; /* but this indeed can */ \
|
|
1663 if (__x1 < __x2) /* did we get it? */ \
|
|
1664 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
|
|
1665 \
|
|
1666 (w1) = __x3 + __ll_highpart (__x1); \
|
|
1667 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
|
|
1668 } while (0)
|
|
1669 #endif
|
|
1670
|
|
1671 #if !defined (__umulsidi3)
|
|
1672 #define __umulsidi3(u, v) \
|
|
1673 ({DWunion __w; \
|
|
1674 umul_ppmm (__w.s.high, __w.s.low, u, v); \
|
|
1675 __w.ll; })
|
|
1676 #endif
|
|
1677
|
|
1678 /* Define this unconditionally, so it can be used for debugging. */
|
|
1679 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
|
|
1680 do { \
|
|
1681 UWtype __d1, __d0, __q1, __q0; \
|
|
1682 UWtype __r1, __r0, __m; \
|
|
1683 __d1 = __ll_highpart (d); \
|
|
1684 __d0 = __ll_lowpart (d); \
|
|
1685 \
|
|
1686 __r1 = (n1) % __d1; \
|
|
1687 __q1 = (n1) / __d1; \
|
|
1688 __m = (UWtype) __q1 * __d0; \
|
|
1689 __r1 = __r1 * __ll_B | __ll_highpart (n0); \
|
|
1690 if (__r1 < __m) \
|
|
1691 { \
|
|
1692 __q1--, __r1 += (d); \
|
|
1693 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
|
|
1694 if (__r1 < __m) \
|
|
1695 __q1--, __r1 += (d); \
|
|
1696 } \
|
|
1697 __r1 -= __m; \
|
|
1698 \
|
|
1699 __r0 = __r1 % __d1; \
|
|
1700 __q0 = __r1 / __d1; \
|
|
1701 __m = (UWtype) __q0 * __d0; \
|
|
1702 __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
|
|
1703 if (__r0 < __m) \
|
|
1704 { \
|
|
1705 __q0--, __r0 += (d); \
|
|
1706 if (__r0 >= (d)) \
|
|
1707 if (__r0 < __m) \
|
|
1708 __q0--, __r0 += (d); \
|
|
1709 } \
|
|
1710 __r0 -= __m; \
|
|
1711 \
|
|
1712 (q) = (UWtype) __q1 * __ll_B | __q0; \
|
|
1713 (r) = __r0; \
|
|
1714 } while (0)
|
|
1715
|
|
1716 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
|
|
1717 __udiv_w_sdiv (defined in libgcc or elsewhere). */
|
|
1718 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
|
|
1719 #define udiv_qrnnd(q, r, nh, nl, d) \
|
|
1720 do { \
|
|
1721 extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype); \
|
|
1722 UWtype __r; \
|
|
1723 (q) = __udiv_w_sdiv (&__r, nh, nl, d); \
|
|
1724 (r) = __r; \
|
|
1725 } while (0)
|
|
1726 #endif
|
|
1727
|
|
1728 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
|
|
1729 #if !defined (udiv_qrnnd)
|
|
1730 #define UDIV_NEEDS_NORMALIZATION 1
|
|
1731 #define udiv_qrnnd __udiv_qrnnd_c
|
|
1732 #endif
|
|
1733
|
|
1734 #if !defined (count_leading_zeros)
|
|
1735 #define count_leading_zeros(count, x) \
|
|
1736 do { \
|
|
1737 UWtype __xr = (x); \
|
|
1738 UWtype __a; \
|
|
1739 \
|
|
1740 if (W_TYPE_SIZE <= 32) \
|
|
1741 { \
|
|
1742 __a = __xr < ((UWtype)1<<2*__BITS4) \
|
|
1743 ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \
|
|
1744 : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \
|
|
1745 } \
|
|
1746 else \
|
|
1747 { \
|
|
1748 for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
|
|
1749 if (((__xr >> __a) & 0xff) != 0) \
|
|
1750 break; \
|
|
1751 } \
|
|
1752 \
|
|
1753 (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
|
|
1754 } while (0)
|
|
1755 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
|
|
1756 #endif
|
|
1757
|
|
1758 #if !defined (count_trailing_zeros)
|
|
1759 /* Define count_trailing_zeros using count_leading_zeros. The latter might be
|
|
1760 defined in asm, but if it is not, the C version above is good enough. */
|
|
1761 #define count_trailing_zeros(count, x) \
|
|
1762 do { \
|
|
1763 UWtype __ctz_x = (x); \
|
|
1764 UWtype __ctz_c; \
|
|
1765 count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
|
|
1766 (count) = W_TYPE_SIZE - 1 - __ctz_c; \
|
|
1767 } while (0)
|
|
1768 #endif
|
|
1769
|
|
1770 #ifndef UDIV_NEEDS_NORMALIZATION
|
|
1771 #define UDIV_NEEDS_NORMALIZATION 0
|
|
1772 #endif
|