1 | #include <tommath.h> |
---|
2 | #ifdef BN_MP_KARATSUBA_MUL_C |
---|
3 | /* LibTomMath, multiple-precision integer library -- Tom St Denis |
---|
4 | * |
---|
5 | * LibTomMath is a library that provides multiple-precision |
---|
6 | * integer arithmetic as well as number theoretic functionality. |
---|
7 | * |
---|
8 | * The library was designed directly after the MPI library by |
---|
9 | * Michael Fromberger but has been written from scratch with |
---|
10 | * additional optimizations in place. |
---|
11 | * |
---|
12 | * The library is free for all purposes without any express |
---|
13 | * guarantee it works. |
---|
14 | * |
---|
15 | * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com |
---|
16 | */ |
---|
17 | |
---|
18 | /* c = |a| * |b| using Karatsuba Multiplication using |
---|
19 | * three half size multiplications |
---|
20 | * |
---|
21 | * Let B represent the radix [e.g. 2**DIGIT_BIT] and |
---|
22 | * let n represent half of the number of digits in |
---|
23 | * the min(a,b) |
---|
24 | * |
---|
25 | * a = a1 * B**n + a0 |
---|
26 | * b = b1 * B**n + b0 |
---|
27 | * |
---|
28 | * Then, a * b => |
---|
29 | a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 |
---|
30 | * |
---|
31 | * Note that a1b1 and a0b0 are used twice and only need to be |
---|
32 | * computed once. So in total three half size (half # of |
---|
33 | * digit) multiplications are performed, a0b0, a1b1 and |
---|
34 | * (a1+b1)(a0+b0) |
---|
35 | * |
---|
36 | * Note that a multiplication of half the digits requires |
---|
37 | * 1/4th the number of single precision multiplications so in |
---|
38 | * total after one call 25% of the single precision multiplications |
---|
39 | * are saved. Note also that the call to mp_mul can end up back |
---|
40 | * in this function if the a0, a1, b0, or b1 are above the threshold. |
---|
41 | * This is known as divide-and-conquer and leads to the famous |
---|
42 | * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than |
---|
43 | * the standard O(N**2) that the baseline/comba methods use. |
---|
44 | * Generally though the overhead of this method doesn't pay off |
---|
45 | * until a certain size (N ~ 80) is reached. |
---|
46 | */ |
---|
47 | int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) |
---|
48 | { |
---|
49 | mp_int x0, x1, y0, y1, t1, x0y0, x1y1; |
---|
50 | int B, err; |
---|
51 | |
---|
52 | /* default the return code to an error */ |
---|
53 | err = MP_MEM; |
---|
54 | |
---|
55 | /* min # of digits */ |
---|
56 | B = MIN (a->used, b->used); |
---|
57 | |
---|
58 | /* now divide in two */ |
---|
59 | B = B >> 1; |
---|
60 | |
---|
61 | /* init copy all the temps */ |
---|
62 | if (mp_init_size (&x0, B) != MP_OKAY) |
---|
63 | goto ERR; |
---|
64 | if (mp_init_size (&x1, a->used - B) != MP_OKAY) |
---|
65 | goto X0; |
---|
66 | if (mp_init_size (&y0, B) != MP_OKAY) |
---|
67 | goto X1; |
---|
68 | if (mp_init_size (&y1, b->used - B) != MP_OKAY) |
---|
69 | goto Y0; |
---|
70 | |
---|
71 | /* init temps */ |
---|
72 | if (mp_init_size (&t1, B * 2) != MP_OKAY) |
---|
73 | goto Y1; |
---|
74 | if (mp_init_size (&x0y0, B * 2) != MP_OKAY) |
---|
75 | goto T1; |
---|
76 | if (mp_init_size (&x1y1, B * 2) != MP_OKAY) |
---|
77 | goto X0Y0; |
---|
78 | |
---|
79 | /* now shift the digits */ |
---|
80 | x0.used = y0.used = B; |
---|
81 | x1.used = a->used - B; |
---|
82 | y1.used = b->used - B; |
---|
83 | |
---|
84 | { |
---|
85 | register int x; |
---|
86 | register mp_digit *tmpa, *tmpb, *tmpx, *tmpy; |
---|
87 | |
---|
88 | /* we copy the digits directly instead of using higher level functions |
---|
89 | * since we also need to shift the digits |
---|
90 | */ |
---|
91 | tmpa = a->dp; |
---|
92 | tmpb = b->dp; |
---|
93 | |
---|
94 | tmpx = x0.dp; |
---|
95 | tmpy = y0.dp; |
---|
96 | for (x = 0; x < B; x++) { |
---|
97 | *tmpx++ = *tmpa++; |
---|
98 | *tmpy++ = *tmpb++; |
---|
99 | } |
---|
100 | |
---|
101 | tmpx = x1.dp; |
---|
102 | for (x = B; x < a->used; x++) { |
---|
103 | *tmpx++ = *tmpa++; |
---|
104 | } |
---|
105 | |
---|
106 | tmpy = y1.dp; |
---|
107 | for (x = B; x < b->used; x++) { |
---|
108 | *tmpy++ = *tmpb++; |
---|
109 | } |
---|
110 | } |
---|
111 | |
---|
112 | /* only need to clamp the lower words since by definition the |
---|
113 | * upper words x1/y1 must have a known number of digits |
---|
114 | */ |
---|
115 | mp_clamp (&x0); |
---|
116 | mp_clamp (&y0); |
---|
117 | |
---|
118 | /* now calc the products x0y0 and x1y1 */ |
---|
119 | /* after this x0 is no longer required, free temp [x0==t2]! */ |
---|
120 | if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) |
---|
121 | goto X1Y1; /* x0y0 = x0*y0 */ |
---|
122 | if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) |
---|
123 | goto X1Y1; /* x1y1 = x1*y1 */ |
---|
124 | |
---|
125 | /* now calc x1+x0 and y1+y0 */ |
---|
126 | if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) |
---|
127 | goto X1Y1; /* t1 = x1 - x0 */ |
---|
128 | if (s_mp_add (&y1, &y0, &x0) != MP_OKAY) |
---|
129 | goto X1Y1; /* t2 = y1 - y0 */ |
---|
130 | if (mp_mul (&t1, &x0, &t1) != MP_OKAY) |
---|
131 | goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ |
---|
132 | |
---|
133 | /* add x0y0 */ |
---|
134 | if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) |
---|
135 | goto X1Y1; /* t2 = x0y0 + x1y1 */ |
---|
136 | if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY) |
---|
137 | goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ |
---|
138 | |
---|
139 | /* shift by B */ |
---|
140 | if (mp_lshd (&t1, B) != MP_OKAY) |
---|
141 | goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ |
---|
142 | if (mp_lshd (&x1y1, B * 2) != MP_OKAY) |
---|
143 | goto X1Y1; /* x1y1 = x1y1 << 2*B */ |
---|
144 | |
---|
145 | if (mp_add (&x0y0, &t1, &t1) != MP_OKAY) |
---|
146 | goto X1Y1; /* t1 = x0y0 + t1 */ |
---|
147 | if (mp_add (&t1, &x1y1, c) != MP_OKAY) |
---|
148 | goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ |
---|
149 | |
---|
150 | /* Algorithm succeeded set the return code to MP_OKAY */ |
---|
151 | err = MP_OKAY; |
---|
152 | |
---|
153 | X1Y1:mp_clear (&x1y1); |
---|
154 | X0Y0:mp_clear (&x0y0); |
---|
155 | T1:mp_clear (&t1); |
---|
156 | Y1:mp_clear (&y1); |
---|
157 | Y0:mp_clear (&y0); |
---|
158 | X1:mp_clear (&x1); |
---|
159 | X0:mp_clear (&x0); |
---|
160 | ERR: |
---|
161 | return err; |
---|
162 | } |
---|
163 | #endif |
---|
164 | |
---|
165 | /* $Source: /cvsroot/tcl/libtommath/bn_mp_karatsuba_mul.c,v $ */ |
---|
166 | /* $Revision: 1.1.1.3 $ */ |
---|
167 | /* $Date: 2006/12/01 00:08:11 $ */ |
---|