]> git.wh0rd.org - ICEs.git/blob - 152043/fp_mul_comba.c.0
more
[ICEs.git] / 152043 / fp_mul_comba.c.0
1 #include <tfm.h>
2
3 #if defined(TFM_X86)
4
5 #define COMBA_START
6
7 /* clear the chaining variables */
8 #define COMBA_CLEAR \
9 c0 = c1 = c2 = 0;
10
11 /* forward the carry to the next digit */
12 #define COMBA_FORWARD \
13 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
14
15 /* store the first sum */
16 #define COMBA_STORE(x) \
17 x = c0;
18
19 /* store the second sum [carry] */
20 #define COMBA_STORE2(x) \
21 x = c1;
22
23 /* anything you need at the end */
24 #define COMBA_FINI
25
26 /* this should multiply i and j */
27 #define MULADD(i, j) \
28 asm( \
29 "movl %6,%%eax \n\t" \
30 "mull %7 \n\t" \
31 "addl %%eax,%0 \n\t" \
32 "adcl %%edx,%1 \n\t" \
33 "adcl $0,%2 \n\t" \
34 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
35
36 #endif
37
38
39 /* generic PxQ multiplier */
40 void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
41 {
42 int ix, iy, iz, tx, ty, pa;
43 fp_digit c0, c1, c2, *tmpx, *tmpy;
44 fp_int tmp, *dst;
45
46 COMBA_START;
47 COMBA_CLEAR;
48
49 /* get size of output and trim */
50 pa = A->used + B->used;
51 if (pa >= FP_SIZE) {
52 pa = FP_SIZE-1;
53 }
54
55 if (A == C || B == C) {
56 fp_zero(&tmp);
57 dst = &tmp;
58 } else {
59 fp_zero(C);
60 dst = C;
61 }
62
63 for (ix = 0; ix < pa; ix++) {
64 /* get offsets into the two bignums */
65 ty = MIN(ix, B->used-1);
66 tx = ix - ty;
67
68 /* setup temp aliases */
69 tmpx = A->dp + tx;
70 tmpy = B->dp + ty;
71
72 /* this is the number of times the loop will iterrate, essentially its
73 while (tx++ < a->used && ty-- >= 0) { ... }
74 */
75 iy = MIN(A->used-tx, ty+1);
76
77 /* execute loop */
78 COMBA_FORWARD;
79 for (iz = 0; iz < iy; ++iz) {
80 MULADD(*tmpx++, *tmpy--);
81 }
82
83 /* store term */
84 COMBA_STORE(dst->dp[ix]);
85 }
86 /* store final carry */
87 COMBA_STORE2(dst->dp[ix]);
88 COMBA_FINI;
89
90 dst->used = pa;
91 fp_clamp(dst);
92 dst->sign = dst->used ? A->sign ^ B->sign : FP_ZPOS;
93 fp_copy(dst, C);
94 }