Mercurial > hg > Members > anatofuz > MoarVM
comparison 3rdparty/libtommath/bn_mp_toom_mul.c @ 0:2cf249471370
convert mercurial for git
author | Takahiro SHIMIZU <anatofuz@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 08 May 2018 16:09:12 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2cf249471370 |
---|---|
1 #include <tommath_private.h> | |
2 #ifdef BN_MP_TOOM_MUL_C | |
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis | |
4 * | |
5 * LibTomMath is a library that provides multiple-precision | |
6 * integer arithmetic as well as number theoretic functionality. | |
7 * | |
8 * The library was designed directly after the MPI library by | |
9 * Michael Fromberger but has been written from scratch with | |
10 * additional optimizations in place. | |
11 * | |
12 * The library is free for all purposes without any express | |
13 * guarantee it works. | |
14 * | |
15 * Tom St Denis, tstdenis82@gmail.com, http://libtom.org | |
16 */ | |
17 | |
18 /* multiplication using the Toom-Cook 3-way algorithm | |
19 * | |
20 * Much more complicated than Karatsuba but has a lower | |
21 * asymptotic running time of O(N**1.464). This algorithm is | |
22 * only particularly useful on VERY large inputs | |
23 * (we're talking 1000s of digits here...). | |
24 */ | |
25 int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) | |
26 { | |
27 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; | |
28 int res, B; | |
29 | |
30 /* init temps */ | |
31 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, | |
32 &a0, &a1, &a2, &b0, &b1, | |
33 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { | |
34 return res; | |
35 } | |
36 | |
37 /* B */ | |
38 B = MIN(a->used, b->used) / 3; | |
39 | |
40 /* a = a2 * B**2 + a1 * B + a0 */ | |
41 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { | |
42 goto ERR_; | |
43 } | |
44 | |
45 if ((res = mp_copy(a, &a1)) != MP_OKAY) { | |
46 goto ERR_; | |
47 } | |
48 mp_rshd(&a1, B); | |
49 if ((res = mp_mod_2d(&a1, DIGIT_BIT * B, &a1)) != MP_OKAY) { | |
50 goto ERR_; | |
51 } | |
52 | |
53 if ((res = mp_copy(a, &a2)) != MP_OKAY) { | |
54 goto ERR_; | |
55 } | |
56 mp_rshd(&a2, B*2); | |
57 | |
58 /* b = b2 * B**2 + b1 * B + b0 */ | |
59 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { | |
60 goto ERR_; | |
61 } | |
62 | |
63 if ((res = mp_copy(b, &b1)) != MP_OKAY) { | |
64 goto ERR_; | |
65 } | |
66 mp_rshd(&b1, B); | |
67 (void)mp_mod_2d(&b1, DIGIT_BIT * B, &b1); | |
68 | |
69 if ((res = mp_copy(b, &b2)) != MP_OKAY) { | |
70 goto ERR_; | |
71 } | |
72 mp_rshd(&b2, B*2); | |
73 | |
74 /* w0 = a0*b0 */ | |
75 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) { | |
76 goto ERR_; | |
77 } | |
78 | |
79 /* w4 = a2 * b2 */ | |
80 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) { | |
81 goto ERR_; | |
82 } | |
83 | |
84 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ | |
85 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { | |
86 goto ERR_; | |
87 } | |
88 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { | |
89 goto ERR_; | |
90 } | |
91 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { | |
92 goto ERR_; | |
93 } | |
94 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { | |
95 goto ERR_; | |
96 } | |
97 | |
98 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) { | |
99 goto ERR_; | |
100 } | |
101 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { | |
102 goto ERR_; | |
103 } | |
104 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { | |
105 goto ERR_; | |
106 } | |
107 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { | |
108 goto ERR_; | |
109 } | |
110 | |
111 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) { | |
112 goto ERR_; | |
113 } | |
114 | |
115 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ | |
116 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { | |
117 goto ERR_; | |
118 } | |
119 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { | |
120 goto ERR_; | |
121 } | |
122 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { | |
123 goto ERR_; | |
124 } | |
125 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { | |
126 goto ERR_; | |
127 } | |
128 | |
129 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) { | |
130 goto ERR_; | |
131 } | |
132 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { | |
133 goto ERR_; | |
134 } | |
135 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { | |
136 goto ERR_; | |
137 } | |
138 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { | |
139 goto ERR_; | |
140 } | |
141 | |
142 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) { | |
143 goto ERR_; | |
144 } | |
145 | |
146 | |
147 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ | |
148 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { | |
149 goto ERR_; | |
150 } | |
151 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { | |
152 goto ERR_; | |
153 } | |
154 if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) { | |
155 goto ERR_; | |
156 } | |
157 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { | |
158 goto ERR_; | |
159 } | |
160 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) { | |
161 goto ERR_; | |
162 } | |
163 | |
164 /* now solve the matrix | |
165 | |
166 0 0 0 0 1 | |
167 1 2 4 8 16 | |
168 1 1 1 1 1 | |
169 16 8 4 2 1 | |
170 1 0 0 0 0 | |
171 | |
172 using 12 subtractions, 4 shifts, | |
173 2 small divisions and 1 small multiplication | |
174 */ | |
175 | |
176 /* r1 - r4 */ | |
177 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { | |
178 goto ERR_; | |
179 } | |
180 /* r3 - r0 */ | |
181 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { | |
182 goto ERR_; | |
183 } | |
184 /* r1/2 */ | |
185 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { | |
186 goto ERR_; | |
187 } | |
188 /* r3/2 */ | |
189 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { | |
190 goto ERR_; | |
191 } | |
192 /* r2 - r0 - r4 */ | |
193 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { | |
194 goto ERR_; | |
195 } | |
196 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { | |
197 goto ERR_; | |
198 } | |
199 /* r1 - r2 */ | |
200 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { | |
201 goto ERR_; | |
202 } | |
203 /* r3 - r2 */ | |
204 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { | |
205 goto ERR_; | |
206 } | |
207 /* r1 - 8r0 */ | |
208 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { | |
209 goto ERR_; | |
210 } | |
211 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { | |
212 goto ERR_; | |
213 } | |
214 /* r3 - 8r4 */ | |
215 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { | |
216 goto ERR_; | |
217 } | |
218 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { | |
219 goto ERR_; | |
220 } | |
221 /* 3r2 - r1 - r3 */ | |
222 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { | |
223 goto ERR_; | |
224 } | |
225 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { | |
226 goto ERR_; | |
227 } | |
228 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { | |
229 goto ERR_; | |
230 } | |
231 /* r1 - r2 */ | |
232 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { | |
233 goto ERR_; | |
234 } | |
235 /* r3 - r2 */ | |
236 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { | |
237 goto ERR_; | |
238 } | |
239 /* r1/3 */ | |
240 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { | |
241 goto ERR_; | |
242 } | |
243 /* r3/3 */ | |
244 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { | |
245 goto ERR_; | |
246 } | |
247 | |
248 /* at this point shift W[n] by B*n */ | |
249 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { | |
250 goto ERR_; | |
251 } | |
252 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { | |
253 goto ERR_; | |
254 } | |
255 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { | |
256 goto ERR_; | |
257 } | |
258 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { | |
259 goto ERR_; | |
260 } | |
261 | |
262 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) { | |
263 goto ERR_; | |
264 } | |
265 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { | |
266 goto ERR_; | |
267 } | |
268 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { | |
269 goto ERR_; | |
270 } | |
271 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) { | |
272 goto ERR_; | |
273 } | |
274 | |
275 ERR_: | |
276 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, | |
277 &a0, &a1, &a2, &b0, &b1, | |
278 &b2, &tmp1, &tmp2, NULL); | |
279 return res; | |
280 } | |
281 | |
282 #endif | |
283 | |
284 /* $Source$ */ | |
285 /* $Revision$ */ | |
286 /* $Date$ */ |