0
|
1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
|
|
2 Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
|
|
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
|
|
4 Free Software Foundation, Inc.
|
|
5
|
|
6 This file is part of the GNU C Library.
|
|
7
|
|
8 The GNU C Library is free software; you can redistribute it and/or
|
|
9 modify it under the terms of the GNU Lesser General Public
|
|
10 License as published by the Free Software Foundation; either
|
|
11 version 2.1 of the License, or (at your option) any later version.
|
|
12
|
|
13 In addition to the permissions in the GNU Lesser General Public
|
|
14 License, the Free Software Foundation gives you unlimited
|
|
15 permission to link the compiled version of this file into
|
|
16 combinations with other programs, and to distribute those
|
|
17 combinations without any restriction coming from the use of this
|
|
18 file. (The Lesser General Public License restrictions do apply in
|
|
19 other respects; for example, they cover modification of the file,
|
|
20 and distribution when not linked into a combine executable.)
|
|
21
|
|
22 The GNU C Library is distributed in the hope that it will be useful,
|
|
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
25 Lesser General Public License for more details.
|
|
26
|
|
27 You should have received a copy of the GNU Lesser General Public
|
|
28 License along with the GNU C Library; if not, write to the Free
|
|
29 Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
|
|
30 MA 02110-1301, USA. */
|
|
31
|
|
32 /* You have to define the following before including this file:
|
|
33
|
|
34 UWtype -- An unsigned type, default type for operations (typically a "word")
|
|
35 UHWtype -- An unsigned type, at least half the size of UWtype.
|
|
36 UDWtype -- An unsigned type, at least twice as large a UWtype
|
|
37 W_TYPE_SIZE -- size in bits of UWtype
|
|
38
|
|
39 UQItype -- Unsigned 8 bit type.
|
|
40 SItype, USItype -- Signed and unsigned 32 bit types.
|
|
41 DItype, UDItype -- Signed and unsigned 64 bit types.
|
|
42
|
|
43 On a 32 bit machine UWtype should typically be USItype;
|
|
44 on a 64 bit machine, UWtype should typically be UDItype. */
|
|
45
|
|
46 #define __BITS4 (W_TYPE_SIZE / 4)
|
|
47 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
|
|
48 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
|
|
49 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
|
|
50
|
|
51 #ifndef W_TYPE_SIZE
|
|
52 #define W_TYPE_SIZE 32
|
|
53 #define UWtype USItype
|
|
54 #define UHWtype USItype
|
|
55 #define UDWtype UDItype
|
|
56 #endif
|
|
57
|
|
58 /* Used in glibc only. */
|
|
59 #ifndef attribute_hidden
|
|
60 #define attribute_hidden
|
|
61 #endif
|
|
62
|
|
63 extern const UQItype __clz_tab[256] attribute_hidden;
|
|
64
|
|
65 /* Define auxiliary asm macros.
|
|
66
|
|
67 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
|
|
68 UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
|
|
69 word product in HIGH_PROD and LOW_PROD.
|
|
70
|
|
71 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
|
|
72 UDWtype product. This is just a variant of umul_ppmm.
|
|
73
|
|
74 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
|
|
75 denominator) divides a UDWtype, composed by the UWtype integers
|
|
76 HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
|
|
77 in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
|
|
78 than DENOMINATOR for correct operation. If, in addition, the most
|
|
79 significant bit of DENOMINATOR must be 1, then the pre-processor symbol
|
|
80 UDIV_NEEDS_NORMALIZATION is defined to 1.
|
|
81
|
|
82 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
|
|
83 denominator). Like udiv_qrnnd but the numbers are signed. The quotient
|
|
84 is rounded towards 0.
|
|
85
|
|
86 5) count_leading_zeros(count, x) counts the number of zero-bits from the
|
|
87 msb to the first nonzero bit in the UWtype X. This is the number of
|
|
88 steps X needs to be shifted left to set the msb. Undefined for X == 0,
|
|
89 unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
|
|
90
|
|
91 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
|
|
92 from the least significant end.
|
|
93
|
|
94 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
|
|
95 high_addend_2, low_addend_2) adds two UWtype integers, composed by
|
|
96 HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
|
|
97 respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
|
|
98 (i.e. carry out) is not stored anywhere, and is lost.
|
|
99
|
|
100 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
|
|
101 high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
|
|
102 composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
|
|
103 LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
|
|
104 and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
|
|
105 and is lost.
|
|
106
|
|
107 If any of these macros are left undefined for a particular CPU,
|
|
108 C macros are used. */
|
|
109
|
|
110 /* The CPUs come in alphabetical order below.
|
|
111
|
|
112 Please add support for more CPUs here, or improve the current support
|
|
113 for the CPUs below!
|
|
114 (E.g. WE32100, IBM360.) */
|
|
115
|
|
116 #if defined (__GNUC__) && !defined (NO_ASM)
|
|
117
|
|
118 /* We sometimes need to clobber "cc" with gcc2, but that would not be
|
|
119 understood by gcc1. Use cpp to avoid major code duplication. */
|
|
120 #if __GNUC__ < 2
|
|
121 #define __CLOBBER_CC
|
|
122 #define __AND_CLOBBER_CC
|
|
123 #else /* __GNUC__ >= 2 */
|
|
124 #define __CLOBBER_CC : "cc"
|
|
125 #define __AND_CLOBBER_CC , "cc"
|
|
126 #endif /* __GNUC__ < 2 */
|
|
127
|
|
128 #if defined (__alpha) && W_TYPE_SIZE == 64
|
|
129 #define umul_ppmm(ph, pl, m0, m1) \
|
|
130 do { \
|
|
131 UDItype __m0 = (m0), __m1 = (m1); \
|
|
132 (ph) = __builtin_alpha_umulh (__m0, __m1); \
|
|
133 (pl) = __m0 * __m1; \
|
|
134 } while (0)
|
|
135 #define UMUL_TIME 46
|
|
136 #ifndef LONGLONG_STANDALONE
|
|
137 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
138 do { UDItype __r; \
|
|
139 (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
|
|
140 (r) = __r; \
|
|
141 } while (0)
|
|
142 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
|
|
143 #define UDIV_TIME 220
|
|
144 #endif /* LONGLONG_STANDALONE */
|
|
145 #ifdef __alpha_cix__
|
|
146 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
|
|
147 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
|
|
148 #define COUNT_LEADING_ZEROS_0 64
|
|
149 #else
|
|
150 #define count_leading_zeros(COUNT,X) \
|
|
151 do { \
|
|
152 UDItype __xr = (X), __t, __a; \
|
|
153 __t = __builtin_alpha_cmpbge (0, __xr); \
|
|
154 __a = __clz_tab[__t ^ 0xff] - 1; \
|
|
155 __t = __builtin_alpha_extbl (__xr, __a); \
|
|
156 (COUNT) = 64 - (__clz_tab[__t] + __a*8); \
|
|
157 } while (0)
|
|
158 #define count_trailing_zeros(COUNT,X) \
|
|
159 do { \
|
|
160 UDItype __xr = (X), __t, __a; \
|
|
161 __t = __builtin_alpha_cmpbge (0, __xr); \
|
|
162 __t = ~__t & -~__t; \
|
|
163 __a = ((__t & 0xCC) != 0) * 2; \
|
|
164 __a += ((__t & 0xF0) != 0) * 4; \
|
|
165 __a += ((__t & 0xAA) != 0); \
|
|
166 __t = __builtin_alpha_extbl (__xr, __a); \
|
|
167 __a <<= 3; \
|
|
168 __t &= -__t; \
|
|
169 __a += ((__t & 0xCC) != 0) * 2; \
|
|
170 __a += ((__t & 0xF0) != 0) * 4; \
|
|
171 __a += ((__t & 0xAA) != 0); \
|
|
172 (COUNT) = __a; \
|
|
173 } while (0)
|
|
174 #endif /* __alpha_cix__ */
|
|
175 #endif /* __alpha */
|
|
176
|
|
177 #if defined (__arc__) && W_TYPE_SIZE == 32
|
|
178 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
179 __asm__ ("add.f %1, %4, %5\n\tadc %0, %2, %3" \
|
|
180 : "=r" ((USItype) (sh)), \
|
|
181 "=&r" ((USItype) (sl)) \
|
|
182 : "%r" ((USItype) (ah)), \
|
|
183 "rIJ" ((USItype) (bh)), \
|
|
184 "%r" ((USItype) (al)), \
|
|
185 "rIJ" ((USItype) (bl)))
|
|
186 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
187 __asm__ ("sub.f %1, %4, %5\n\tsbc %0, %2, %3" \
|
|
188 : "=r" ((USItype) (sh)), \
|
|
189 "=&r" ((USItype) (sl)) \
|
|
190 : "r" ((USItype) (ah)), \
|
|
191 "rIJ" ((USItype) (bh)), \
|
|
192 "r" ((USItype) (al)), \
|
|
193 "rIJ" ((USItype) (bl)))
|
|
194 /* Call libgcc routine. */
|
|
195 #define umul_ppmm(w1, w0, u, v) \
|
|
196 do { \
|
|
197 DWunion __w; \
|
|
198 __w.ll = __umulsidi3 (u, v); \
|
|
199 w1 = __w.s.high; \
|
|
200 w0 = __w.s.low; \
|
|
201 } while (0)
|
|
202 #define __umulsidi3 __umulsidi3
|
|
203 UDItype __umulsidi3 (USItype, USItype);
|
|
204 #endif
|
|
205
|
|
206 #if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
|
|
207 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
208 __asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \
|
|
209 : "=r" ((USItype) (sh)), \
|
|
210 "=&r" ((USItype) (sl)) \
|
|
211 : "%r" ((USItype) (ah)), \
|
|
212 "rI" ((USItype) (bh)), \
|
|
213 "%r" ((USItype) (al)), \
|
|
214 "rI" ((USItype) (bl)) __CLOBBER_CC)
|
|
215 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
216 __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \
|
|
217 : "=r" ((USItype) (sh)), \
|
|
218 "=&r" ((USItype) (sl)) \
|
|
219 : "r" ((USItype) (ah)), \
|
|
220 "rI" ((USItype) (bh)), \
|
|
221 "r" ((USItype) (al)), \
|
|
222 "rI" ((USItype) (bl)) __CLOBBER_CC)
|
|
223 #define umul_ppmm(xh, xl, a, b) \
|
|
224 {register USItype __t0, __t1, __t2; \
|
|
225 __asm__ ("%@ Inlined umul_ppmm\n" \
|
|
226 " mov %2, %5, lsr #16\n" \
|
|
227 " mov %0, %6, lsr #16\n" \
|
|
228 " bic %3, %5, %2, lsl #16\n" \
|
|
229 " bic %4, %6, %0, lsl #16\n" \
|
|
230 " mul %1, %3, %4\n" \
|
|
231 " mul %4, %2, %4\n" \
|
|
232 " mul %3, %0, %3\n" \
|
|
233 " mul %0, %2, %0\n" \
|
|
234 " adds %3, %4, %3\n" \
|
|
235 " addcs %0, %0, #65536\n" \
|
|
236 " adds %1, %1, %3, lsl #16\n" \
|
|
237 " adc %0, %0, %3, lsr #16" \
|
|
238 : "=&r" ((USItype) (xh)), \
|
|
239 "=r" ((USItype) (xl)), \
|
|
240 "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \
|
|
241 : "r" ((USItype) (a)), \
|
|
242 "r" ((USItype) (b)) __CLOBBER_CC );}
|
|
243 #define UMUL_TIME 20
|
|
244 #define UDIV_TIME 100
|
|
245 #endif /* __arm__ */
|
|
246
|
|
247 #if defined(__arm__)
|
|
248 /* Let gcc decide how best to implement count_leading_zeros. */
|
|
249 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
|
|
250 #define COUNT_LEADING_ZEROS_0 32
|
|
251 #endif
|
|
252
|
|
253 #if defined (__CRIS__) && __CRIS_arch_version >= 3
|
|
254 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
|
|
255 #if __CRIS_arch_version >= 8
|
|
256 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
|
|
257 #endif
|
|
258 #endif /* __CRIS__ */
|
|
259
|
|
260 #if defined (__hppa) && W_TYPE_SIZE == 32
|
|
261 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
262 __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
|
|
263 : "=r" ((USItype) (sh)), \
|
|
264 "=&r" ((USItype) (sl)) \
|
|
265 : "%rM" ((USItype) (ah)), \
|
|
266 "rM" ((USItype) (bh)), \
|
|
267 "%rM" ((USItype) (al)), \
|
|
268 "rM" ((USItype) (bl)))
|
|
269 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
270 __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \
|
|
271 : "=r" ((USItype) (sh)), \
|
|
272 "=&r" ((USItype) (sl)) \
|
|
273 : "rM" ((USItype) (ah)), \
|
|
274 "rM" ((USItype) (bh)), \
|
|
275 "rM" ((USItype) (al)), \
|
|
276 "rM" ((USItype) (bl)))
|
|
277 #if defined (_PA_RISC1_1)
|
|
278 #define umul_ppmm(w1, w0, u, v) \
|
|
279 do { \
|
|
280 union \
|
|
281 { \
|
|
282 UDItype __f; \
|
|
283 struct {USItype __w1, __w0;} __w1w0; \
|
|
284 } __t; \
|
|
285 __asm__ ("xmpyu %1,%2,%0" \
|
|
286 : "=x" (__t.__f) \
|
|
287 : "x" ((USItype) (u)), \
|
|
288 "x" ((USItype) (v))); \
|
|
289 (w1) = __t.__w1w0.__w1; \
|
|
290 (w0) = __t.__w1w0.__w0; \
|
|
291 } while (0)
|
|
292 #define UMUL_TIME 8
|
|
293 #else
|
|
294 #define UMUL_TIME 30
|
|
295 #endif
|
|
296 #define UDIV_TIME 40
|
|
297 #define count_leading_zeros(count, x) \
|
|
298 do { \
|
|
299 USItype __tmp; \
|
|
300 __asm__ ( \
|
|
301 "ldi 1,%0\n" \
|
|
302 " extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \
|
|
303 " extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n"\
|
|
304 " ldo 16(%0),%0 ; Yes. Perform add.\n" \
|
|
305 " extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \
|
|
306 " extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n"\
|
|
307 " ldo 8(%0),%0 ; Yes. Perform add.\n" \
|
|
308 " extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \
|
|
309 " extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n"\
|
|
310 " ldo 4(%0),%0 ; Yes. Perform add.\n" \
|
|
311 " extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \
|
|
312 " extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n"\
|
|
313 " ldo 2(%0),%0 ; Yes. Perform add.\n" \
|
|
314 " extru %1,30,1,%1 ; Extract bit 1.\n" \
|
|
315 " sub %0,%1,%0 ; Subtract it.\n" \
|
|
316 : "=r" (count), "=r" (__tmp) : "1" (x)); \
|
|
317 } while (0)
|
|
318 #endif
|
|
319
|
|
320 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
|
|
321 #define smul_ppmm(xh, xl, m0, m1) \
|
|
322 do { \
|
|
323 union {DItype __ll; \
|
|
324 struct {USItype __h, __l;} __i; \
|
|
325 } __x; \
|
|
326 __asm__ ("lr %N0,%1\n\tmr %0,%2" \
|
|
327 : "=&r" (__x.__ll) \
|
|
328 : "r" (m0), "r" (m1)); \
|
|
329 (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
|
|
330 } while (0)
|
|
331 #define sdiv_qrnnd(q, r, n1, n0, d) \
|
|
332 do { \
|
|
333 union {DItype __ll; \
|
|
334 struct {USItype __h, __l;} __i; \
|
|
335 } __x; \
|
|
336 __x.__i.__h = n1; __x.__i.__l = n0; \
|
|
337 __asm__ ("dr %0,%2" \
|
|
338 : "=r" (__x.__ll) \
|
|
339 : "0" (__x.__ll), "r" (d)); \
|
|
340 (q) = __x.__i.__l; (r) = __x.__i.__h; \
|
|
341 } while (0)
|
|
342 #endif
|
|
343
|
|
344 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
|
|
345 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
346 __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}" \
|
|
347 : "=r" ((USItype) (sh)), \
|
|
348 "=&r" ((USItype) (sl)) \
|
|
349 : "%0" ((USItype) (ah)), \
|
|
350 "g" ((USItype) (bh)), \
|
|
351 "%1" ((USItype) (al)), \
|
|
352 "g" ((USItype) (bl)))
|
|
353 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
354 __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}" \
|
|
355 : "=r" ((USItype) (sh)), \
|
|
356 "=&r" ((USItype) (sl)) \
|
|
357 : "0" ((USItype) (ah)), \
|
|
358 "g" ((USItype) (bh)), \
|
|
359 "1" ((USItype) (al)), \
|
|
360 "g" ((USItype) (bl)))
|
|
361 #define umul_ppmm(w1, w0, u, v) \
|
|
362 __asm__ ("mul{l} %3" \
|
|
363 : "=a" ((USItype) (w0)), \
|
|
364 "=d" ((USItype) (w1)) \
|
|
365 : "%0" ((USItype) (u)), \
|
|
366 "rm" ((USItype) (v)))
|
|
367 #define udiv_qrnnd(q, r, n1, n0, dv) \
|
|
368 __asm__ ("div{l} %4" \
|
|
369 : "=a" ((USItype) (q)), \
|
|
370 "=d" ((USItype) (r)) \
|
|
371 : "0" ((USItype) (n0)), \
|
|
372 "1" ((USItype) (n1)), \
|
|
373 "rm" ((USItype) (dv)))
|
|
374 #define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
|
|
375 #define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
|
|
376 #define UMUL_TIME 40
|
|
377 #define UDIV_TIME 40
|
|
378 #endif /* 80x86 */
|
|
379
|
|
380 #if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
|
|
381 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
382 __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}" \
|
|
383 : "=r" ((UDItype) (sh)), \
|
|
384 "=&r" ((UDItype) (sl)) \
|
|
385 : "%0" ((UDItype) (ah)), \
|
|
386 "rme" ((UDItype) (bh)), \
|
|
387 "%1" ((UDItype) (al)), \
|
|
388 "rme" ((UDItype) (bl)))
|
|
389 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
390 __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}" \
|
|
391 : "=r" ((UDItype) (sh)), \
|
|
392 "=&r" ((UDItype) (sl)) \
|
|
393 : "0" ((UDItype) (ah)), \
|
|
394 "rme" ((UDItype) (bh)), \
|
|
395 "1" ((UDItype) (al)), \
|
|
396 "rme" ((UDItype) (bl)))
|
|
397 #define umul_ppmm(w1, w0, u, v) \
|
|
398 __asm__ ("mul{q} %3" \
|
|
399 : "=a" ((UDItype) (w0)), \
|
|
400 "=d" ((UDItype) (w1)) \
|
|
401 : "%0" ((UDItype) (u)), \
|
|
402 "rm" ((UDItype) (v)))
|
|
403 #define udiv_qrnnd(q, r, n1, n0, dv) \
|
|
404 __asm__ ("div{q} %4" \
|
|
405 : "=a" ((UDItype) (q)), \
|
|
406 "=d" ((UDItype) (r)) \
|
|
407 : "0" ((UDItype) (n0)), \
|
|
408 "1" ((UDItype) (n1)), \
|
|
409 "rm" ((UDItype) (dv)))
|
|
410 #define count_leading_zeros(count, x) ((count) = __builtin_clzl (x))
|
|
411 #define count_trailing_zeros(count, x) ((count) = __builtin_ctzl (x))
|
|
412 #define UMUL_TIME 40
|
|
413 #define UDIV_TIME 40
|
|
414 #endif /* x86_64 */
|
|
415
|
|
416 #if defined (__i960__) && W_TYPE_SIZE == 32
|
|
417 #define umul_ppmm(w1, w0, u, v) \
|
|
418 ({union {UDItype __ll; \
|
|
419 struct {USItype __l, __h;} __i; \
|
|
420 } __xx; \
|
|
421 __asm__ ("emul %2,%1,%0" \
|
|
422 : "=d" (__xx.__ll) \
|
|
423 : "%dI" ((USItype) (u)), \
|
|
424 "dI" ((USItype) (v))); \
|
|
425 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
|
|
426 #define __umulsidi3(u, v) \
|
|
427 ({UDItype __w; \
|
|
428 __asm__ ("emul %2,%1,%0" \
|
|
429 : "=d" (__w) \
|
|
430 : "%dI" ((USItype) (u)), \
|
|
431 "dI" ((USItype) (v))); \
|
|
432 __w; })
|
|
433 #endif /* __i960__ */
|
|
434
|
|
435 #if defined (__ia64) && W_TYPE_SIZE == 64
|
|
436 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
|
|
437 "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic
|
|
438 code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
|
|
439 register, which takes an extra cycle. */
|
|
440 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
441 do { \
|
|
442 UWtype __x; \
|
|
443 __x = (al) - (bl); \
|
|
444 if ((al) < (bl)) \
|
|
445 (sh) = (ah) - (bh) - 1; \
|
|
446 else \
|
|
447 (sh) = (ah) - (bh); \
|
|
448 (sl) = __x; \
|
|
449 } while (0)
|
|
450
|
|
451 /* Do both product parts in assembly, since that gives better code with
|
|
452 all gcc versions. Some callers will just use the upper part, and in
|
|
453 that situation we waste an instruction, but not any cycles. */
|
|
454 #define umul_ppmm(ph, pl, m0, m1) \
|
|
455 __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \
|
|
456 : "=&f" (ph), "=f" (pl) \
|
|
457 : "f" (m0), "f" (m1))
|
|
458 #define count_leading_zeros(count, x) \
|
|
459 do { \
|
|
460 UWtype _x = (x), _y, _a, _c; \
|
|
461 __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \
|
|
462 __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \
|
|
463 _c = (_a - 1) << 3; \
|
|
464 _x >>= _c; \
|
|
465 if (_x >= 1 << 4) \
|
|
466 _x >>= 4, _c += 4; \
|
|
467 if (_x >= 1 << 2) \
|
|
468 _x >>= 2, _c += 2; \
|
|
469 _c += _x >> 1; \
|
|
470 (count) = W_TYPE_SIZE - 1 - _c; \
|
|
471 } while (0)
|
|
472 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
|
|
473 based, and we don't need a special case for x==0 here */
|
|
474 #define count_trailing_zeros(count, x) \
|
|
475 do { \
|
|
476 UWtype __ctz_x = (x); \
|
|
477 __asm__ ("popcnt %0 = %1" \
|
|
478 : "=r" (count) \
|
|
479 : "r" ((__ctz_x-1) & ~__ctz_x)); \
|
|
480 } while (0)
|
|
481 #define UMUL_TIME 14
|
|
482 #endif
|
|
483
|
|
484 #if defined (__M32R__) && W_TYPE_SIZE == 32
|
|
485 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
486 /* The cmp clears the condition bit. */ \
|
|
487 __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3" \
|
|
488 : "=r" ((USItype) (sh)), \
|
|
489 "=&r" ((USItype) (sl)) \
|
|
490 : "0" ((USItype) (ah)), \
|
|
491 "r" ((USItype) (bh)), \
|
|
492 "1" ((USItype) (al)), \
|
|
493 "r" ((USItype) (bl)) \
|
|
494 : "cbit")
|
|
495 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
496 /* The cmp clears the condition bit. */ \
|
|
497 __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3" \
|
|
498 : "=r" ((USItype) (sh)), \
|
|
499 "=&r" ((USItype) (sl)) \
|
|
500 : "0" ((USItype) (ah)), \
|
|
501 "r" ((USItype) (bh)), \
|
|
502 "1" ((USItype) (al)), \
|
|
503 "r" ((USItype) (bl)) \
|
|
504 : "cbit")
|
|
505 #endif /* __M32R__ */
|
|
506
|
|
507 #if defined (__mc68000__) && W_TYPE_SIZE == 32
|
|
508 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
509 __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \
|
|
510 : "=d" ((USItype) (sh)), \
|
|
511 "=&d" ((USItype) (sl)) \
|
|
512 : "%0" ((USItype) (ah)), \
|
|
513 "d" ((USItype) (bh)), \
|
|
514 "%1" ((USItype) (al)), \
|
|
515 "g" ((USItype) (bl)))
|
|
516 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
517 __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \
|
|
518 : "=d" ((USItype) (sh)), \
|
|
519 "=&d" ((USItype) (sl)) \
|
|
520 : "0" ((USItype) (ah)), \
|
|
521 "d" ((USItype) (bh)), \
|
|
522 "1" ((USItype) (al)), \
|
|
523 "g" ((USItype) (bl)))
|
|
524
|
|
525 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r. */
|
|
526 #if (defined (__mc68020__) && !defined (__mc68060__))
|
|
527 #define umul_ppmm(w1, w0, u, v) \
|
|
528 __asm__ ("mulu%.l %3,%1:%0" \
|
|
529 : "=d" ((USItype) (w0)), \
|
|
530 "=d" ((USItype) (w1)) \
|
|
531 : "%0" ((USItype) (u)), \
|
|
532 "dmi" ((USItype) (v)))
|
|
533 #define UMUL_TIME 45
|
|
534 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
535 __asm__ ("divu%.l %4,%1:%0" \
|
|
536 : "=d" ((USItype) (q)), \
|
|
537 "=d" ((USItype) (r)) \
|
|
538 : "0" ((USItype) (n0)), \
|
|
539 "1" ((USItype) (n1)), \
|
|
540 "dmi" ((USItype) (d)))
|
|
541 #define UDIV_TIME 90
|
|
542 #define sdiv_qrnnd(q, r, n1, n0, d) \
|
|
543 __asm__ ("divs%.l %4,%1:%0" \
|
|
544 : "=d" ((USItype) (q)), \
|
|
545 "=d" ((USItype) (r)) \
|
|
546 : "0" ((USItype) (n0)), \
|
|
547 "1" ((USItype) (n1)), \
|
|
548 "dmi" ((USItype) (d)))
|
|
549
|
|
550 #elif defined (__mcoldfire__) /* not mc68020 */
|
|
551
|
|
552 #define umul_ppmm(xh, xl, a, b) \
|
|
553 __asm__ ("| Inlined umul_ppmm\n" \
|
|
554 " move%.l %2,%/d0\n" \
|
|
555 " move%.l %3,%/d1\n" \
|
|
556 " move%.l %/d0,%/d2\n" \
|
|
557 " swap %/d0\n" \
|
|
558 " move%.l %/d1,%/d3\n" \
|
|
559 " swap %/d1\n" \
|
|
560 " move%.w %/d2,%/d4\n" \
|
|
561 " mulu %/d3,%/d4\n" \
|
|
562 " mulu %/d1,%/d2\n" \
|
|
563 " mulu %/d0,%/d3\n" \
|
|
564 " mulu %/d0,%/d1\n" \
|
|
565 " move%.l %/d4,%/d0\n" \
|
|
566 " clr%.w %/d0\n" \
|
|
567 " swap %/d0\n" \
|
|
568 " add%.l %/d0,%/d2\n" \
|
|
569 " add%.l %/d3,%/d2\n" \
|
|
570 " jcc 1f\n" \
|
|
571 " add%.l %#65536,%/d1\n" \
|
|
572 "1: swap %/d2\n" \
|
|
573 " moveq %#0,%/d0\n" \
|
|
574 " move%.w %/d2,%/d0\n" \
|
|
575 " move%.w %/d4,%/d2\n" \
|
|
576 " move%.l %/d2,%1\n" \
|
|
577 " add%.l %/d1,%/d0\n" \
|
|
578 " move%.l %/d0,%0" \
|
|
579 : "=g" ((USItype) (xh)), \
|
|
580 "=g" ((USItype) (xl)) \
|
|
581 : "g" ((USItype) (a)), \
|
|
582 "g" ((USItype) (b)) \
|
|
583 : "d0", "d1", "d2", "d3", "d4")
|
|
584 #define UMUL_TIME 100
|
|
585 #define UDIV_TIME 400
|
|
586 #else /* not ColdFire */
|
|
587 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX. */
|
|
588 #define umul_ppmm(xh, xl, a, b) \
|
|
589 __asm__ ("| Inlined umul_ppmm\n" \
|
|
590 " move%.l %2,%/d0\n" \
|
|
591 " move%.l %3,%/d1\n" \
|
|
592 " move%.l %/d0,%/d2\n" \
|
|
593 " swap %/d0\n" \
|
|
594 " move%.l %/d1,%/d3\n" \
|
|
595 " swap %/d1\n" \
|
|
596 " move%.w %/d2,%/d4\n" \
|
|
597 " mulu %/d3,%/d4\n" \
|
|
598 " mulu %/d1,%/d2\n" \
|
|
599 " mulu %/d0,%/d3\n" \
|
|
600 " mulu %/d0,%/d1\n" \
|
|
601 " move%.l %/d4,%/d0\n" \
|
|
602 " eor%.w %/d0,%/d0\n" \
|
|
603 " swap %/d0\n" \
|
|
604 " add%.l %/d0,%/d2\n" \
|
|
605 " add%.l %/d3,%/d2\n" \
|
|
606 " jcc 1f\n" \
|
|
607 " add%.l %#65536,%/d1\n" \
|
|
608 "1: swap %/d2\n" \
|
|
609 " moveq %#0,%/d0\n" \
|
|
610 " move%.w %/d2,%/d0\n" \
|
|
611 " move%.w %/d4,%/d2\n" \
|
|
612 " move%.l %/d2,%1\n" \
|
|
613 " add%.l %/d1,%/d0\n" \
|
|
614 " move%.l %/d0,%0" \
|
|
615 : "=g" ((USItype) (xh)), \
|
|
616 "=g" ((USItype) (xl)) \
|
|
617 : "g" ((USItype) (a)), \
|
|
618 "g" ((USItype) (b)) \
|
|
619 : "d0", "d1", "d2", "d3", "d4")
|
|
620 #define UMUL_TIME 100
|
|
621 #define UDIV_TIME 400
|
|
622
|
|
623 #endif /* not mc68020 */
|
|
624
|
|
625 /* The '020, '030, '040 and '060 have bitfield insns.
|
|
626 cpu32 disguises as a 68020, but lacks them. */
|
|
627 #if defined (__mc68020__) && !defined (__mcpu32__)
|
|
628 #define count_leading_zeros(count, x) \
|
|
629 __asm__ ("bfffo %1{%b2:%b2},%0" \
|
|
630 : "=d" ((USItype) (count)) \
|
|
631 : "od" ((USItype) (x)), "n" (0))
|
|
632 /* Some ColdFire architectures have a ff1 instruction supported via
|
|
633 __builtin_clz. */
|
|
634 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
|
|
635 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
|
|
636 #define COUNT_LEADING_ZEROS_0 32
|
|
637 #endif
|
|
638 #endif /* mc68000 */
|
|
639
|
|
640 #if defined (__m88000__) && W_TYPE_SIZE == 32
|
|
641 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
642 __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \
|
|
643 : "=r" ((USItype) (sh)), \
|
|
644 "=&r" ((USItype) (sl)) \
|
|
645 : "%rJ" ((USItype) (ah)), \
|
|
646 "rJ" ((USItype) (bh)), \
|
|
647 "%rJ" ((USItype) (al)), \
|
|
648 "rJ" ((USItype) (bl)))
|
|
649 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
650 __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \
|
|
651 : "=r" ((USItype) (sh)), \
|
|
652 "=&r" ((USItype) (sl)) \
|
|
653 : "rJ" ((USItype) (ah)), \
|
|
654 "rJ" ((USItype) (bh)), \
|
|
655 "rJ" ((USItype) (al)), \
|
|
656 "rJ" ((USItype) (bl)))
|
|
657 #define count_leading_zeros(count, x) \
|
|
658 do { \
|
|
659 USItype __cbtmp; \
|
|
660 __asm__ ("ff1 %0,%1" \
|
|
661 : "=r" (__cbtmp) \
|
|
662 : "r" ((USItype) (x))); \
|
|
663 (count) = __cbtmp ^ 31; \
|
|
664 } while (0)
|
|
665 #define COUNT_LEADING_ZEROS_0 63 /* sic */
|
|
666 #if defined (__mc88110__)
|
|
667 #define umul_ppmm(wh, wl, u, v) \
|
|
668 do { \
|
|
669 union {UDItype __ll; \
|
|
670 struct {USItype __h, __l;} __i; \
|
|
671 } __xx; \
|
|
672 __asm__ ("mulu.d %0,%1,%2" \
|
|
673 : "=r" (__xx.__ll) \
|
|
674 : "r" ((USItype) (u)), \
|
|
675 "r" ((USItype) (v))); \
|
|
676 (wh) = __xx.__i.__h; \
|
|
677 (wl) = __xx.__i.__l; \
|
|
678 } while (0)
|
|
679 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
680 ({union {UDItype __ll; \
|
|
681 struct {USItype __h, __l;} __i; \
|
|
682 } __xx; \
|
|
683 USItype __q; \
|
|
684 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
|
|
685 __asm__ ("divu.d %0,%1,%2" \
|
|
686 : "=r" (__q) \
|
|
687 : "r" (__xx.__ll), \
|
|
688 "r" ((USItype) (d))); \
|
|
689 (r) = (n0) - __q * (d); (q) = __q; })
|
|
690 #define UMUL_TIME 5
|
|
691 #define UDIV_TIME 25
|
|
692 #else
|
|
693 #define UMUL_TIME 17
|
|
694 #define UDIV_TIME 150
|
|
695 #endif /* __mc88110__ */
|
|
696 #endif /* __m88000__ */
|
|
697
|
|
698 #if defined (__mips__) && W_TYPE_SIZE == 32
|
|
699 #define umul_ppmm(w1, w0, u, v) \
|
|
700 do { \
|
|
701 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
|
|
702 (w1) = (USItype) (__x >> 32); \
|
|
703 (w0) = (USItype) (__x); \
|
|
704 } while (0)
|
|
705 #define UMUL_TIME 10
|
|
706 #define UDIV_TIME 100
|
|
707
|
|
708 #if (__mips == 32 || __mips == 64) && ! __mips16
|
|
709 #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
|
|
710 #define COUNT_LEADING_ZEROS_0 32
|
|
711 #endif
|
|
712 #endif /* __mips__ */
|
|
713
|
|
714 #if defined (__ns32000__) && W_TYPE_SIZE == 32
|
|
715 #define umul_ppmm(w1, w0, u, v) \
|
|
716 ({union {UDItype __ll; \
|
|
717 struct {USItype __l, __h;} __i; \
|
|
718 } __xx; \
|
|
719 __asm__ ("meid %2,%0" \
|
|
720 : "=g" (__xx.__ll) \
|
|
721 : "%0" ((USItype) (u)), \
|
|
722 "g" ((USItype) (v))); \
|
|
723 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
|
|
724 #define __umulsidi3(u, v) \
|
|
725 ({UDItype __w; \
|
|
726 __asm__ ("meid %2,%0" \
|
|
727 : "=g" (__w) \
|
|
728 : "%0" ((USItype) (u)), \
|
|
729 "g" ((USItype) (v))); \
|
|
730 __w; })
|
|
731 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
732 ({union {UDItype __ll; \
|
|
733 struct {USItype __l, __h;} __i; \
|
|
734 } __xx; \
|
|
735 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
|
|
736 __asm__ ("deid %2,%0" \
|
|
737 : "=g" (__xx.__ll) \
|
|
738 : "0" (__xx.__ll), \
|
|
739 "g" ((USItype) (d))); \
|
|
740 (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
|
|
741 #define count_trailing_zeros(count,x) \
|
|
742 do { \
|
|
743 __asm__ ("ffsd %2,%0" \
|
|
744 : "=r" ((USItype) (count)) \
|
|
745 : "0" ((USItype) 0), \
|
|
746 "r" ((USItype) (x))); \
|
|
747 } while (0)
|
|
748 #endif /* __ns32000__ */
|
|
749
|
|
750 /* FIXME: We should test _IBMR2 here when we add assembly support for the
|
|
751 system vendor compilers.
|
|
752 FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good
|
|
753 enough, since that hits ARM and m68k too. */
|
|
754 #if (defined (_ARCH_PPC) /* AIX */ \
|
|
755 || defined (_ARCH_PWR) /* AIX */ \
|
|
756 || defined (_ARCH_COM) /* AIX */ \
|
|
757 || defined (__powerpc__) /* gcc */ \
|
|
758 || defined (__POWERPC__) /* BEOS */ \
|
|
759 || defined (__ppc__) /* Darwin */ \
|
|
760 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
|
|
761 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
|
|
762 && CPU_FAMILY == PPC) \
|
|
763 ) && W_TYPE_SIZE == 32
|
|
764 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
765 do { \
|
|
766 if (__builtin_constant_p (bh) && (bh) == 0) \
|
|
767 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
|
|
768 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
|
769 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
|
|
770 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
|
|
771 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
|
772 else \
|
|
773 __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
|
|
774 : "=r" (sh), "=&r" (sl) \
|
|
775 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
|
|
776 } while (0)
|
|
777 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
778 do { \
|
|
779 if (__builtin_constant_p (ah) && (ah) == 0) \
|
|
780 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
|
|
781 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
|
782 else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
|
|
783 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
|
|
784 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
|
785 else if (__builtin_constant_p (bh) && (bh) == 0) \
|
|
786 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
|
|
787 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
|
788 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
|
|
789 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
|
|
790 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
|
791 else \
|
|
792 __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
|
|
793 : "=r" (sh), "=&r" (sl) \
|
|
794 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
|
|
795 } while (0)
|
|
796 #define count_leading_zeros(count, x) \
|
|
797 __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
|
|
798 #define COUNT_LEADING_ZEROS_0 32
|
|
799 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
|
|
800 || defined (__ppc__) \
|
|
801 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
|
|
802 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
|
|
803 && CPU_FAMILY == PPC)
|
|
804 #define umul_ppmm(ph, pl, m0, m1) \
|
|
805 do { \
|
|
806 USItype __m0 = (m0), __m1 = (m1); \
|
|
807 __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
|
808 (pl) = __m0 * __m1; \
|
|
809 } while (0)
|
|
810 #define UMUL_TIME 15
|
|
811 #define smul_ppmm(ph, pl, m0, m1) \
|
|
812 do { \
|
|
813 SItype __m0 = (m0), __m1 = (m1); \
|
|
814 __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
|
815 (pl) = __m0 * __m1; \
|
|
816 } while (0)
|
|
817 #define SMUL_TIME 14
|
|
818 #define UDIV_TIME 120
|
|
819 #elif defined (_ARCH_PWR)
|
|
820 #define UMUL_TIME 8
|
|
821 #define smul_ppmm(xh, xl, m0, m1) \
|
|
822 __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
|
|
823 #define SMUL_TIME 4
|
|
824 #define sdiv_qrnnd(q, r, nh, nl, d) \
|
|
825 __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
|
|
826 #define UDIV_TIME 100
|
|
827 #endif
|
|
828 #endif /* 32-bit POWER architecture variants. */
|
|
829
|
|
830 /* We should test _IBMR2 here when we add assembly support for the system
|
|
831 vendor compilers. */
|
|
832 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
|
|
833 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
834 do { \
|
|
835 if (__builtin_constant_p (bh) && (bh) == 0) \
|
|
836 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
|
|
837 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
|
838 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
|
|
839 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
|
|
840 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
|
841 else \
|
|
842 __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
|
|
843 : "=r" (sh), "=&r" (sl) \
|
|
844 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
|
|
845 } while (0)
|
|
846 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
847 do { \
|
|
848 if (__builtin_constant_p (ah) && (ah) == 0) \
|
|
849 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
|
|
850 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
|
851 else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
|
|
852 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
|
|
853 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
|
854 else if (__builtin_constant_p (bh) && (bh) == 0) \
|
|
855 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
|
|
856 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
|
857 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
|
|
858 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
|
|
859 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
|
860 else \
|
|
861 __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
|
|
862 : "=r" (sh), "=&r" (sl) \
|
|
863 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
|
|
864 } while (0)
|
|
865 #define count_leading_zeros(count, x) \
|
|
866 __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
|
|
867 #define COUNT_LEADING_ZEROS_0 64
|
|
868 #define umul_ppmm(ph, pl, m0, m1) \
|
|
869 do { \
|
|
870 UDItype __m0 = (m0), __m1 = (m1); \
|
|
871 __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
|
872 (pl) = __m0 * __m1; \
|
|
873 } while (0)
|
|
874 #define UMUL_TIME 15
|
|
875 #define smul_ppmm(ph, pl, m0, m1) \
|
|
876 do { \
|
|
877 DItype __m0 = (m0), __m1 = (m1); \
|
|
878 __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
|
879 (pl) = __m0 * __m1; \
|
|
880 } while (0)
|
|
881 #define SMUL_TIME 14 /* ??? */
|
|
882 #define UDIV_TIME 120 /* ??? */
|
|
883 #endif /* 64-bit PowerPC. */
|
|
884
|
|
885 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
|
|
886 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
887 __asm__ ("a %1,%5\n\tae %0,%3" \
|
|
888 : "=r" ((USItype) (sh)), \
|
|
889 "=&r" ((USItype) (sl)) \
|
|
890 : "%0" ((USItype) (ah)), \
|
|
891 "r" ((USItype) (bh)), \
|
|
892 "%1" ((USItype) (al)), \
|
|
893 "r" ((USItype) (bl)))
|
|
894 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
895 __asm__ ("s %1,%5\n\tse %0,%3" \
|
|
896 : "=r" ((USItype) (sh)), \
|
|
897 "=&r" ((USItype) (sl)) \
|
|
898 : "0" ((USItype) (ah)), \
|
|
899 "r" ((USItype) (bh)), \
|
|
900 "1" ((USItype) (al)), \
|
|
901 "r" ((USItype) (bl)))
|
|
902 #define umul_ppmm(ph, pl, m0, m1) \
|
|
903 do { \
|
|
904 USItype __m0 = (m0), __m1 = (m1); \
|
|
905 __asm__ ( \
|
|
906 "s r2,r2\n" \
|
|
907 " mts r10,%2\n" \
|
|
908 " m r2,%3\n" \
|
|
909 " m r2,%3\n" \
|
|
910 " m r2,%3\n" \
|
|
911 " m r2,%3\n" \
|
|
912 " m r2,%3\n" \
|
|
913 " m r2,%3\n" \
|
|
914 " m r2,%3\n" \
|
|
915 " m r2,%3\n" \
|
|
916 " m r2,%3\n" \
|
|
917 " m r2,%3\n" \
|
|
918 " m r2,%3\n" \
|
|
919 " m r2,%3\n" \
|
|
920 " m r2,%3\n" \
|
|
921 " m r2,%3\n" \
|
|
922 " m r2,%3\n" \
|
|
923 " m r2,%3\n" \
|
|
924 " cas %0,r2,r0\n" \
|
|
925 " mfs r10,%1" \
|
|
926 : "=r" ((USItype) (ph)), \
|
|
927 "=r" ((USItype) (pl)) \
|
|
928 : "%r" (__m0), \
|
|
929 "r" (__m1) \
|
|
930 : "r2"); \
|
|
931 (ph) += ((((SItype) __m0 >> 31) & __m1) \
|
|
932 + (((SItype) __m1 >> 31) & __m0)); \
|
|
933 } while (0)
|
|
934 #define UMUL_TIME 20
|
|
935 #define UDIV_TIME 200
|
|
936 #define count_leading_zeros(count, x) \
|
|
937 do { \
|
|
938 if ((x) >= 0x10000) \
|
|
939 __asm__ ("clz %0,%1" \
|
|
940 : "=r" ((USItype) (count)) \
|
|
941 : "r" ((USItype) (x) >> 16)); \
|
|
942 else \
|
|
943 { \
|
|
944 __asm__ ("clz %0,%1" \
|
|
945 : "=r" ((USItype) (count)) \
|
|
946 : "r" ((USItype) (x))); \
|
|
947 (count) += 16; \
|
|
948 } \
|
|
949 } while (0)
|
|
950 #endif
|
|
951
|
|
952 #if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
|
|
953 #ifndef __sh1__
|
|
954 #define umul_ppmm(w1, w0, u, v) \
|
|
955 __asm__ ( \
|
|
956 "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0 mach,%0" \
|
|
957 : "=r<" ((USItype)(w1)), \
|
|
958 "=r<" ((USItype)(w0)) \
|
|
959 : "r" ((USItype)(u)), \
|
|
960 "r" ((USItype)(v)) \
|
|
961 : "macl", "mach")
|
|
962 #define UMUL_TIME 5
|
|
963 #endif
|
|
964
|
|
965 /* This is the same algorithm as __udiv_qrnnd_c. */
|
|
966 #define UDIV_NEEDS_NORMALIZATION 1
|
|
967
|
|
968 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
969 do { \
|
|
970 extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
|
|
971 __attribute__ ((visibility ("hidden"))); \
|
|
972 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
|
|
973 __asm__ ( \
|
|
974 "mov%M4 %4,r5\n" \
|
|
975 " swap.w %3,r4\n" \
|
|
976 " swap.w r5,r6\n" \
|
|
977 " jsr @%5\n" \
|
|
978 " shll16 r6\n" \
|
|
979 " swap.w r4,r4\n" \
|
|
980 " jsr @%5\n" \
|
|
981 " swap.w r1,%0\n" \
|
|
982 " or r1,%0" \
|
|
983 : "=r" (q), "=&z" (r) \
|
|
984 : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
|
|
985 : "r1", "r2", "r4", "r5", "r6", "pr"); \
|
|
986 } while (0)
|
|
987
|
|
988 #define UDIV_TIME 80
|
|
989
|
|
990 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
991 __asm__ ("clrt;subc %5,%1; subc %4,%0" \
|
|
992 : "=r" (sh), "=r" (sl) \
|
|
993 : "0" (ah), "1" (al), "r" (bh), "r" (bl))
|
|
994
|
|
995 #endif /* __sh__ */
|
|
996
|
|
997 #if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
|
|
998 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
|
|
999 #define count_leading_zeros(count, x) \
|
|
1000 do \
|
|
1001 { \
|
|
1002 UDItype x_ = (USItype)(x); \
|
|
1003 SItype c_; \
|
|
1004 \
|
|
1005 __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_)); \
|
|
1006 (count) = c_ - 31; \
|
|
1007 } \
|
|
1008 while (0)
|
|
1009 #define COUNT_LEADING_ZEROS_0 32
|
|
1010 #endif
|
|
1011
|
|
1012 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
|
|
1013 && W_TYPE_SIZE == 32
|
|
1014 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1015 __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
|
|
1016 : "=r" ((USItype) (sh)), \
|
|
1017 "=&r" ((USItype) (sl)) \
|
|
1018 : "%rJ" ((USItype) (ah)), \
|
|
1019 "rI" ((USItype) (bh)), \
|
|
1020 "%rJ" ((USItype) (al)), \
|
|
1021 "rI" ((USItype) (bl)) \
|
|
1022 __CLOBBER_CC)
|
|
1023 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1024 __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
|
|
1025 : "=r" ((USItype) (sh)), \
|
|
1026 "=&r" ((USItype) (sl)) \
|
|
1027 : "rJ" ((USItype) (ah)), \
|
|
1028 "rI" ((USItype) (bh)), \
|
|
1029 "rJ" ((USItype) (al)), \
|
|
1030 "rI" ((USItype) (bl)) \
|
|
1031 __CLOBBER_CC)
|
|
1032 #if defined (__sparc_v8__)
|
|
1033 #define umul_ppmm(w1, w0, u, v) \
|
|
1034 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
|
|
1035 : "=r" ((USItype) (w1)), \
|
|
1036 "=r" ((USItype) (w0)) \
|
|
1037 : "r" ((USItype) (u)), \
|
|
1038 "r" ((USItype) (v)))
|
|
1039 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
|
|
1040 __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
|
|
1041 : "=&r" ((USItype) (__q)), \
|
|
1042 "=&r" ((USItype) (__r)) \
|
|
1043 : "r" ((USItype) (__n1)), \
|
|
1044 "r" ((USItype) (__n0)), \
|
|
1045 "r" ((USItype) (__d)))
|
|
1046 #else
|
|
1047 #if defined (__sparclite__)
|
|
1048 /* This has hardware multiply but not divide. It also has two additional
|
|
1049 instructions scan (ffs from high bit) and divscc. */
|
|
1050 #define umul_ppmm(w1, w0, u, v) \
|
|
1051 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
|
|
1052 : "=r" ((USItype) (w1)), \
|
|
1053 "=r" ((USItype) (w0)) \
|
|
1054 : "r" ((USItype) (u)), \
|
|
1055 "r" ((USItype) (v)))
|
|
1056 #define udiv_qrnnd(q, r, n1, n0, d) \
|
|
1057 __asm__ ("! Inlined udiv_qrnnd\n" \
|
|
1058 " wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
|
|
1059 " tst %%g0\n" \
|
|
1060 " divscc %3,%4,%%g1\n" \
|
|
1061 " divscc %%g1,%4,%%g1\n" \
|
|
1062 " divscc %%g1,%4,%%g1\n" \
|
|
1063 " divscc %%g1,%4,%%g1\n" \
|
|
1064 " divscc %%g1,%4,%%g1\n" \
|
|
1065 " divscc %%g1,%4,%%g1\n" \
|
|
1066 " divscc %%g1,%4,%%g1\n" \
|
|
1067 " divscc %%g1,%4,%%g1\n" \
|
|
1068 " divscc %%g1,%4,%%g1\n" \
|
|
1069 " divscc %%g1,%4,%%g1\n" \
|
|
1070 " divscc %%g1,%4,%%g1\n" \
|
|
1071 " divscc %%g1,%4,%%g1\n" \
|
|
1072 " divscc %%g1,%4,%%g1\n" \
|
|
1073 " divscc %%g1,%4,%%g1\n" \
|
|
1074 " divscc %%g1,%4,%%g1\n" \
|
|
1075 " divscc %%g1,%4,%%g1\n" \
|
|
1076 " divscc %%g1,%4,%%g1\n" \
|
|
1077 " divscc %%g1,%4,%%g1\n" \
|
|
1078 " divscc %%g1,%4,%%g1\n" \
|
|
1079 " divscc %%g1,%4,%%g1\n" \
|
|
1080 " divscc %%g1,%4,%%g1\n" \
|
|
1081 " divscc %%g1,%4,%%g1\n" \
|
|
1082 " divscc %%g1,%4,%%g1\n" \
|
|
1083 " divscc %%g1,%4,%%g1\n" \
|
|
1084 " divscc %%g1,%4,%%g1\n" \
|
|
1085 " divscc %%g1,%4,%%g1\n" \
|
|
1086 " divscc %%g1,%4,%%g1\n" \
|
|
1087 " divscc %%g1,%4,%%g1\n" \
|
|
1088 " divscc %%g1,%4,%%g1\n" \
|
|
1089 " divscc %%g1,%4,%%g1\n" \
|
|
1090 " divscc %%g1,%4,%%g1\n" \
|
|
1091 " divscc %%g1,%4,%0\n" \
|
|
1092 " rd %%y,%1\n" \
|
|
1093 " bl,a 1f\n" \
|
|
1094 " add %1,%4,%1\n" \
|
|
1095 "1: ! End of inline udiv_qrnnd" \
|
|
1096 : "=r" ((USItype) (q)), \
|
|
1097 "=r" ((USItype) (r)) \
|
|
1098 : "r" ((USItype) (n1)), \
|
|
1099 "r" ((USItype) (n0)), \
|
|
1100 "rI" ((USItype) (d)) \
|
|
1101 : "g1" __AND_CLOBBER_CC)
|
|
1102 #define UDIV_TIME 37
|
|
1103 #define count_leading_zeros(count, x) \
|
|
1104 do { \
|
|
1105 __asm__ ("scan %1,1,%0" \
|
|
1106 : "=r" ((USItype) (count)) \
|
|
1107 : "r" ((USItype) (x))); \
|
|
1108 } while (0)
|
|
1109 /* Early sparclites return 63 for an argument of 0, but they warn that future
|
|
1110 implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
|
|
1111 undefined. */
|
|
1112 #else
|
|
1113 /* SPARC without integer multiplication and divide instructions.
|
|
1114 (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
|
|
1115 #define umul_ppmm(w1, w0, u, v) \
|
|
1116 __asm__ ("! Inlined umul_ppmm\n" \
|
|
1117 " wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n"\
|
|
1118 " sra %3,31,%%o5 ! Don't move this insn\n" \
|
|
1119 " and %2,%%o5,%%o5 ! Don't move this insn\n" \
|
|
1120 " andcc %%g0,0,%%g1 ! Don't move this insn\n" \
|
|
1121 " mulscc %%g1,%3,%%g1\n" \
|
|
1122 " mulscc %%g1,%3,%%g1\n" \
|
|
1123 " mulscc %%g1,%3,%%g1\n" \
|
|
1124 " mulscc %%g1,%3,%%g1\n" \
|
|
1125 " mulscc %%g1,%3,%%g1\n" \
|
|
1126 " mulscc %%g1,%3,%%g1\n" \
|
|
1127 " mulscc %%g1,%3,%%g1\n" \
|
|
1128 " mulscc %%g1,%3,%%g1\n" \
|
|
1129 " mulscc %%g1,%3,%%g1\n" \
|
|
1130 " mulscc %%g1,%3,%%g1\n" \
|
|
1131 " mulscc %%g1,%3,%%g1\n" \
|
|
1132 " mulscc %%g1,%3,%%g1\n" \
|
|
1133 " mulscc %%g1,%3,%%g1\n" \
|
|
1134 " mulscc %%g1,%3,%%g1\n" \
|
|
1135 " mulscc %%g1,%3,%%g1\n" \
|
|
1136 " mulscc %%g1,%3,%%g1\n" \
|
|
1137 " mulscc %%g1,%3,%%g1\n" \
|
|
1138 " mulscc %%g1,%3,%%g1\n" \
|
|
1139 " mulscc %%g1,%3,%%g1\n" \
|
|
1140 " mulscc %%g1,%3,%%g1\n" \
|
|
1141 " mulscc %%g1,%3,%%g1\n" \
|
|
1142 " mulscc %%g1,%3,%%g1\n" \
|
|
1143 " mulscc %%g1,%3,%%g1\n" \
|
|
1144 " mulscc %%g1,%3,%%g1\n" \
|
|
1145 " mulscc %%g1,%3,%%g1\n" \
|
|
1146 " mulscc %%g1,%3,%%g1\n" \
|
|
1147 " mulscc %%g1,%3,%%g1\n" \
|
|
1148 " mulscc %%g1,%3,%%g1\n" \
|
|
1149 " mulscc %%g1,%3,%%g1\n" \
|
|
1150 " mulscc %%g1,%3,%%g1\n" \
|
|
1151 " mulscc %%g1,%3,%%g1\n" \
|
|
1152 " mulscc %%g1,%3,%%g1\n" \
|
|
1153 " mulscc %%g1,0,%%g1\n" \
|
|
1154 " add %%g1,%%o5,%0\n" \
|
|
1155 " rd %%y,%1" \
|
|
1156 : "=r" ((USItype) (w1)), \
|
|
1157 "=r" ((USItype) (w0)) \
|
|
1158 : "%rI" ((USItype) (u)), \
|
|
1159 "r" ((USItype) (v)) \
|
|
1160 : "g1", "o5" __AND_CLOBBER_CC)
|
|
1161 #define UMUL_TIME 39 /* 39 instructions */
|
|
1162 /* It's quite necessary to add this much assembler for the sparc.
|
|
1163 The default udiv_qrnnd (in C) is more than 10 times slower! */
|
|
1164 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
|
|
1165 __asm__ ("! Inlined udiv_qrnnd\n" \
|
|
1166 " mov 32,%%g1\n" \
|
|
1167 " subcc %1,%2,%%g0\n" \
|
|
1168 "1: bcs 5f\n" \
|
|
1169 " addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
|
|
1170 " sub %1,%2,%1 ! this kills msb of n\n" \
|
|
1171 " addx %1,%1,%1 ! so this can't give carry\n" \
|
|
1172 " subcc %%g1,1,%%g1\n" \
|
|
1173 "2: bne 1b\n" \
|
|
1174 " subcc %1,%2,%%g0\n" \
|
|
1175 " bcs 3f\n" \
|
|
1176 " addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
|
|
1177 " b 3f\n" \
|
|
1178 " sub %1,%2,%1 ! this kills msb of n\n" \
|
|
1179 "4: sub %1,%2,%1\n" \
|
|
1180 "5: addxcc %1,%1,%1\n" \
|
|
1181 " bcc 2b\n" \
|
|
1182 " subcc %%g1,1,%%g1\n" \
|
|
1183 "! Got carry from n. Subtract next step to cancel this carry.\n" \
|
|
1184 " bne 4b\n" \
|
|
1185 " addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n" \
|
|
1186 " sub %1,%2,%1\n" \
|
|
1187 "3: xnor %0,0,%0\n" \
|
|
1188 " ! End of inline udiv_qrnnd" \
|
|
1189 : "=&r" ((USItype) (__q)), \
|
|
1190 "=&r" ((USItype) (__r)) \
|
|
1191 : "r" ((USItype) (__d)), \
|
|
1192 "1" ((USItype) (__n1)), \
|
|
1193 "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
|
|
1194 #define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */
|
|
1195 #endif /* __sparclite__ */
|
|
1196 #endif /* __sparc_v8__ */
|
|
1197 #endif /* sparc32 */
|
|
1198
|
|
1199 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
|
|
1200 && W_TYPE_SIZE == 64
|
|
1201 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1202 __asm__ ("addcc %r4,%5,%1\n\t" \
|
|
1203 "add %r2,%3,%0\n\t" \
|
|
1204 "bcs,a,pn %%xcc, 1f\n\t" \
|
|
1205 "add %0, 1, %0\n" \
|
|
1206 "1:" \
|
|
1207 : "=r" ((UDItype)(sh)), \
|
|
1208 "=&r" ((UDItype)(sl)) \
|
|
1209 : "%rJ" ((UDItype)(ah)), \
|
|
1210 "rI" ((UDItype)(bh)), \
|
|
1211 "%rJ" ((UDItype)(al)), \
|
|
1212 "rI" ((UDItype)(bl)) \
|
|
1213 __CLOBBER_CC)
|
|
1214
|
|
1215 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1216 __asm__ ("subcc %r4,%5,%1\n\t" \
|
|
1217 "sub %r2,%3,%0\n\t" \
|
|
1218 "bcs,a,pn %%xcc, 1f\n\t" \
|
|
1219 "sub %0, 1, %0\n\t" \
|
|
1220 "1:" \
|
|
1221 : "=r" ((UDItype)(sh)), \
|
|
1222 "=&r" ((UDItype)(sl)) \
|
|
1223 : "rJ" ((UDItype)(ah)), \
|
|
1224 "rI" ((UDItype)(bh)), \
|
|
1225 "rJ" ((UDItype)(al)), \
|
|
1226 "rI" ((UDItype)(bl)) \
|
|
1227 __CLOBBER_CC)
|
|
1228
|
|
1229 #define umul_ppmm(wh, wl, u, v) \
|
|
1230 do { \
|
|
1231 UDItype tmp1, tmp2, tmp3, tmp4; \
|
|
1232 __asm__ __volatile__ ( \
|
|
1233 "srl %7,0,%3\n\t" \
|
|
1234 "mulx %3,%6,%1\n\t" \
|
|
1235 "srlx %6,32,%2\n\t" \
|
|
1236 "mulx %2,%3,%4\n\t" \
|
|
1237 "sllx %4,32,%5\n\t" \
|
|
1238 "srl %6,0,%3\n\t" \
|
|
1239 "sub %1,%5,%5\n\t" \
|
|
1240 "srlx %5,32,%5\n\t" \
|
|
1241 "addcc %4,%5,%4\n\t" \
|
|
1242 "srlx %7,32,%5\n\t" \
|
|
1243 "mulx %3,%5,%3\n\t" \
|
|
1244 "mulx %2,%5,%5\n\t" \
|
|
1245 "sethi %%hi(0x80000000),%2\n\t" \
|
|
1246 "addcc %4,%3,%4\n\t" \
|
|
1247 "srlx %4,32,%4\n\t" \
|
|
1248 "add %2,%2,%2\n\t" \
|
|
1249 "movcc %%xcc,%%g0,%2\n\t" \
|
|
1250 "addcc %5,%4,%5\n\t" \
|
|
1251 "sllx %3,32,%3\n\t" \
|
|
1252 "add %1,%3,%1\n\t" \
|
|
1253 "add %5,%2,%0" \
|
|
1254 : "=r" ((UDItype)(wh)), \
|
|
1255 "=&r" ((UDItype)(wl)), \
|
|
1256 "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
|
|
1257 : "r" ((UDItype)(u)), \
|
|
1258 "r" ((UDItype)(v)) \
|
|
1259 __CLOBBER_CC); \
|
|
1260 } while (0)
|
|
1261 #define UMUL_TIME 96
|
|
1262 #define UDIV_TIME 230
|
|
1263 #endif /* sparc64 */
|
|
1264
|
|
1265 #if defined (__vax__) && W_TYPE_SIZE == 32
|
|
1266 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1267 __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
|
|
1268 : "=g" ((USItype) (sh)), \
|
|
1269 "=&g" ((USItype) (sl)) \
|
|
1270 : "%0" ((USItype) (ah)), \
|
|
1271 "g" ((USItype) (bh)), \
|
|
1272 "%1" ((USItype) (al)), \
|
|
1273 "g" ((USItype) (bl)))
|
|
1274 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1275 __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \
|
|
1276 : "=g" ((USItype) (sh)), \
|
|
1277 "=&g" ((USItype) (sl)) \
|
|
1278 : "0" ((USItype) (ah)), \
|
|
1279 "g" ((USItype) (bh)), \
|
|
1280 "1" ((USItype) (al)), \
|
|
1281 "g" ((USItype) (bl)))
|
|
1282 #define umul_ppmm(xh, xl, m0, m1) \
|
|
1283 do { \
|
|
1284 union { \
|
|
1285 UDItype __ll; \
|
|
1286 struct {USItype __l, __h;} __i; \
|
|
1287 } __xx; \
|
|
1288 USItype __m0 = (m0), __m1 = (m1); \
|
|
1289 __asm__ ("emul %1,%2,$0,%0" \
|
|
1290 : "=r" (__xx.__ll) \
|
|
1291 : "g" (__m0), \
|
|
1292 "g" (__m1)); \
|
|
1293 (xh) = __xx.__i.__h; \
|
|
1294 (xl) = __xx.__i.__l; \
|
|
1295 (xh) += ((((SItype) __m0 >> 31) & __m1) \
|
|
1296 + (((SItype) __m1 >> 31) & __m0)); \
|
|
1297 } while (0)
|
|
1298 #define sdiv_qrnnd(q, r, n1, n0, d) \
|
|
1299 do { \
|
|
1300 union {DItype __ll; \
|
|
1301 struct {SItype __l, __h;} __i; \
|
|
1302 } __xx; \
|
|
1303 __xx.__i.__h = n1; __xx.__i.__l = n0; \
|
|
1304 __asm__ ("ediv %3,%2,%0,%1" \
|
|
1305 : "=g" (q), "=g" (r) \
|
|
1306 : "g" (__xx.__ll), "g" (d)); \
|
|
1307 } while (0)
|
|
1308 #endif /* __vax__ */
|
|
1309
|
|
1310 #if defined (__xtensa__) && W_TYPE_SIZE == 32
|
|
1311 /* This code is not Xtensa-configuration-specific, so rely on the compiler
|
|
1312 to expand builtin functions depending on what configuration features
|
|
1313 are available. This avoids library calls when the operation can be
|
|
1314 performed in-line. */
|
|
1315 #define umul_ppmm(w1, w0, u, v) \
|
|
1316 do { \
|
|
1317 DWunion __w; \
|
|
1318 __w.ll = __builtin_umulsidi3 (u, v); \
|
|
1319 w1 = __w.s.high; \
|
|
1320 w0 = __w.s.low; \
|
|
1321 } while (0)
|
|
1322 #define __umulsidi3(u, v) __builtin_umulsidi3 (u, v)
|
|
1323 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
|
|
1324 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
|
|
1325 #endif /* __xtensa__ */
|
|
1326
|
|
1327 #if defined (__z8000__) && W_TYPE_SIZE == 16
|
|
1328 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1329 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
|
|
1330 : "=r" ((unsigned int)(sh)), \
|
|
1331 "=&r" ((unsigned int)(sl)) \
|
|
1332 : "%0" ((unsigned int)(ah)), \
|
|
1333 "r" ((unsigned int)(bh)), \
|
|
1334 "%1" ((unsigned int)(al)), \
|
|
1335 "rQR" ((unsigned int)(bl)))
|
|
1336 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1337 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
|
|
1338 : "=r" ((unsigned int)(sh)), \
|
|
1339 "=&r" ((unsigned int)(sl)) \
|
|
1340 : "0" ((unsigned int)(ah)), \
|
|
1341 "r" ((unsigned int)(bh)), \
|
|
1342 "1" ((unsigned int)(al)), \
|
|
1343 "rQR" ((unsigned int)(bl)))
|
|
1344 #define umul_ppmm(xh, xl, m0, m1) \
|
|
1345 do { \
|
|
1346 union {long int __ll; \
|
|
1347 struct {unsigned int __h, __l;} __i; \
|
|
1348 } __xx; \
|
|
1349 unsigned int __m0 = (m0), __m1 = (m1); \
|
|
1350 __asm__ ("mult %S0,%H3" \
|
|
1351 : "=r" (__xx.__i.__h), \
|
|
1352 "=r" (__xx.__i.__l) \
|
|
1353 : "%1" (__m0), \
|
|
1354 "rQR" (__m1)); \
|
|
1355 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
|
|
1356 (xh) += ((((signed int) __m0 >> 15) & __m1) \
|
|
1357 + (((signed int) __m1 >> 15) & __m0)); \
|
|
1358 } while (0)
|
|
1359 #endif /* __z8000__ */
|
|
1360
|
|
1361 #endif /* __GNUC__ */
|
|
1362
|
|
1363 /* If this machine has no inline assembler, use C macros. */
|
|
1364
|
|
1365 #if !defined (add_ssaaaa)
|
|
1366 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
|
1367 do { \
|
|
1368 UWtype __x; \
|
|
1369 __x = (al) + (bl); \
|
|
1370 (sh) = (ah) + (bh) + (__x < (al)); \
|
|
1371 (sl) = __x; \
|
|
1372 } while (0)
|
|
1373 #endif
|
|
1374
|
|
1375 #if !defined (sub_ddmmss)
|
|
1376 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
1377 do { \
|
|
1378 UWtype __x; \
|
|
1379 __x = (al) - (bl); \
|
|
1380 (sh) = (ah) - (bh) - (__x > (al)); \
|
|
1381 (sl) = __x; \
|
|
1382 } while (0)
|
|
1383 #endif
|
|
1384
|
|
1385 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
|
|
1386 smul_ppmm. */
|
|
1387 #if !defined (umul_ppmm) && defined (smul_ppmm)
|
|
1388 #define umul_ppmm(w1, w0, u, v) \
|
|
1389 do { \
|
|
1390 UWtype __w1; \
|
|
1391 UWtype __xm0 = (u), __xm1 = (v); \
|
|
1392 smul_ppmm (__w1, w0, __xm0, __xm1); \
|
|
1393 (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
|
|
1394 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
|
|
1395 } while (0)
|
|
1396 #endif
|
|
1397
|
|
1398 /* If we still don't have umul_ppmm, define it using plain C. */
|
|
1399 #if !defined (umul_ppmm)
|
|
1400 #define umul_ppmm(w1, w0, u, v) \
|
|
1401 do { \
|
|
1402 UWtype __x0, __x1, __x2, __x3; \
|
|
1403 UHWtype __ul, __vl, __uh, __vh; \
|
|
1404 \
|
|
1405 __ul = __ll_lowpart (u); \
|
|
1406 __uh = __ll_highpart (u); \
|
|
1407 __vl = __ll_lowpart (v); \
|
|
1408 __vh = __ll_highpart (v); \
|
|
1409 \
|
|
1410 __x0 = (UWtype) __ul * __vl; \
|
|
1411 __x1 = (UWtype) __ul * __vh; \
|
|
1412 __x2 = (UWtype) __uh * __vl; \
|
|
1413 __x3 = (UWtype) __uh * __vh; \
|
|
1414 \
|
|
1415 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
|
|
1416 __x1 += __x2; /* but this indeed can */ \
|
|
1417 if (__x1 < __x2) /* did we get it? */ \
|
|
1418 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
|
|
1419 \
|
|
1420 (w1) = __x3 + __ll_highpart (__x1); \
|
|
1421 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
|
|
1422 } while (0)
|
|
1423 #endif
|
|
1424
|
|
1425 #if !defined (__umulsidi3)
|
|
1426 #define __umulsidi3(u, v) \
|
|
1427 ({DWunion __w; \
|
|
1428 umul_ppmm (__w.s.high, __w.s.low, u, v); \
|
|
1429 __w.ll; })
|
|
1430 #endif
|
|
1431
|
|
1432 /* Define this unconditionally, so it can be used for debugging. */
|
|
1433 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
|
|
1434 do { \
|
|
1435 UWtype __d1, __d0, __q1, __q0; \
|
|
1436 UWtype __r1, __r0, __m; \
|
|
1437 __d1 = __ll_highpart (d); \
|
|
1438 __d0 = __ll_lowpart (d); \
|
|
1439 \
|
|
1440 __r1 = (n1) % __d1; \
|
|
1441 __q1 = (n1) / __d1; \
|
|
1442 __m = (UWtype) __q1 * __d0; \
|
|
1443 __r1 = __r1 * __ll_B | __ll_highpart (n0); \
|
|
1444 if (__r1 < __m) \
|
|
1445 { \
|
|
1446 __q1--, __r1 += (d); \
|
|
1447 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
|
|
1448 if (__r1 < __m) \
|
|
1449 __q1--, __r1 += (d); \
|
|
1450 } \
|
|
1451 __r1 -= __m; \
|
|
1452 \
|
|
1453 __r0 = __r1 % __d1; \
|
|
1454 __q0 = __r1 / __d1; \
|
|
1455 __m = (UWtype) __q0 * __d0; \
|
|
1456 __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
|
|
1457 if (__r0 < __m) \
|
|
1458 { \
|
|
1459 __q0--, __r0 += (d); \
|
|
1460 if (__r0 >= (d)) \
|
|
1461 if (__r0 < __m) \
|
|
1462 __q0--, __r0 += (d); \
|
|
1463 } \
|
|
1464 __r0 -= __m; \
|
|
1465 \
|
|
1466 (q) = (UWtype) __q1 * __ll_B | __q0; \
|
|
1467 (r) = __r0; \
|
|
1468 } while (0)
|
|
1469
|
|
1470 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
|
|
1471 __udiv_w_sdiv (defined in libgcc or elsewhere). */
|
|
1472 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
|
|
1473 #define udiv_qrnnd(q, r, nh, nl, d) \
|
|
1474 do { \
|
|
1475 USItype __r; \
|
|
1476 (q) = __udiv_w_sdiv (&__r, nh, nl, d); \
|
|
1477 (r) = __r; \
|
|
1478 } while (0)
|
|
1479 #endif
|
|
1480
|
|
1481 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
|
|
1482 #if !defined (udiv_qrnnd)
|
|
1483 #define UDIV_NEEDS_NORMALIZATION 1
|
|
1484 #define udiv_qrnnd __udiv_qrnnd_c
|
|
1485 #endif
|
|
1486
|
|
1487 #if !defined (count_leading_zeros)
|
|
1488 #define count_leading_zeros(count, x) \
|
|
1489 do { \
|
|
1490 UWtype __xr = (x); \
|
|
1491 UWtype __a; \
|
|
1492 \
|
|
1493 if (W_TYPE_SIZE <= 32) \
|
|
1494 { \
|
|
1495 __a = __xr < ((UWtype)1<<2*__BITS4) \
|
|
1496 ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \
|
|
1497 : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \
|
|
1498 } \
|
|
1499 else \
|
|
1500 { \
|
|
1501 for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
|
|
1502 if (((__xr >> __a) & 0xff) != 0) \
|
|
1503 break; \
|
|
1504 } \
|
|
1505 \
|
|
1506 (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
|
|
1507 } while (0)
|
|
1508 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
|
|
1509 #endif
|
|
1510
|
|
1511 #if !defined (count_trailing_zeros)
|
|
1512 /* Define count_trailing_zeros using count_leading_zeros. The latter might be
|
|
1513 defined in asm, but if it is not, the C version above is good enough. */
|
|
1514 #define count_trailing_zeros(count, x) \
|
|
1515 do { \
|
|
1516 UWtype __ctz_x = (x); \
|
|
1517 UWtype __ctz_c; \
|
|
1518 count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
|
|
1519 (count) = W_TYPE_SIZE - 1 - __ctz_c; \
|
|
1520 } while (0)
|
|
1521 #endif
|
|
1522
|
|
1523 #ifndef UDIV_NEEDS_NORMALIZATION
|
|
1524 #define UDIV_NEEDS_NORMALIZATION 0
|
|
1525 #endif
|