~ chicken-core (chicken-5) 15d7c84f68a53b6ff968d3a226ecf65d40496eb4
commit 15d7c84f68a53b6ff968d3a226ecf65d40496eb4
Author: Peter Bex <peter@more-magic.net>
AuthorDate: Sun Mar 29 19:55:04 2015 +0200
Commit: Peter Bex <peter@more-magic.net>
CommitDate: Sun May 31 14:55:24 2015 +0200
Restore Lehmer's gcd, which *really* improves bignum performance in some cases.
diff --git a/library.scm b/library.scm
index cd827125..c23e7739 100644
--- a/library.scm
+++ b/library.scm
@@ -1111,6 +1111,9 @@ EOF
;;; Basic arithmetic:
+(define-inline (%integer-gcd a b)
+ (##core#inline_allocate ("C_s_a_u_i_integer_gcd" 6) a b))
+
(define (abs x) (##core#inline_allocate ("C_s_a_i_abs" 10) x))
(define (/ arg1 . args)
@@ -1127,7 +1130,7 @@ EOF
(when (eq? y 0)
(##sys#error-hook (foreign-value "C_DIVISION_BY_ZERO_ERROR" int) '/ x y))
(cond ((and (exact-integer? x) (exact-integer? y))
- (let ((g (##sys#integer-gcd x y)))
+ (let ((g (%integer-gcd x y)))
(ratnum (##sys#integer-quotient x g) (##sys#integer-quotient y g))))
;; Compnum *must* be checked first
((or (cplxnum? x) (cplxnum? y))
@@ -1147,8 +1150,8 @@ EOF
;; With g1 = gcd(a, c) and g2 = gcd(b, d) [Knuth, 4.5.1 ex. 4]
(let* ((a (%ratnum-numerator x)) (b (%ratnum-denominator x))
(c (%ratnum-numerator y)) (d (%ratnum-denominator y))
- (g1 (##sys#integer-gcd a c))
- (g2 (##sys#integer-gcd b d)))
+ (g1 (%integer-gcd a c))
+ (g2 (%integer-gcd b d)))
(ratnum (* (quotient a g1) (quotient d g2))
(* (quotient b g2) (quotient c g1))))
;; a/b / c/d = a*d / b*c [with d = 1]
@@ -1629,24 +1632,20 @@ EOF
(exact->inexact (##sys#integer-power a (inexact->exact b)))
(##sys#integer-power a b)))) )
-;; OBSOLETE: Remove this (or change to define-inline)
-(define (##sys#integer-gcd a b)
- (##core#inline_allocate ("C_s_a_u_i_integer_gcd" 6) a b))
-
;; Useful for sane error messages
(define (##sys#internal-gcd loc a b)
(cond ((exact-integer? a)
- (cond ((exact-integer? b) (##sys#integer-gcd a b))
+ (cond ((exact-integer? b) (%integer-gcd a b))
((and (##core#inline "C_i_flonump" b)
(##core#inline "C_u_i_fpintegerp" b))
- (exact->inexact (##sys#integer-gcd a (inexact->exact b))))
+ (exact->inexact (%integer-gcd a (inexact->exact b))))
(else (##sys#error-bad-integer b loc))))
((and (##core#inline "C_i_flonump" a)
(##core#inline "C_u_i_fpintegerp" a))
(cond ((##core#inline "C_i_flonump" b)
(##core#inline_allocate ("C_a_i_flonum_gcd" 4) a b))
((exact-integer? b)
- (exact->inexact (##sys#integer-gcd (inexact->exact a) b)))
+ (exact->inexact (%integer-gcd (inexact->exact a) b)))
(else (##sys#error-bad-integer b loc))))
(else (##sys#error-bad-integer a loc))))
;; For compat reasons, we define this
diff --git a/runtime.c b/runtime.c
index ede32de8..2074468f 100644
--- a/runtime.c
+++ b/runtime.c
@@ -10534,6 +10534,69 @@ C_a_i_flonum_gcd(C_word **p, C_word n, C_word x, C_word y)
return C_flonum(p, xub);
}
+/* This is Lehmer's GCD algorithm with Jebelean's quotient test, as
+ * it is presented in the paper "An Analysis of Lehmer’s Euclidean
+ * GCD Algorithm", by J. Sorenson. Fuck the ACM and their goddamn
+ * paywall; you can currently find the paper here:
+ * http://www.csie.nuk.edu.tw/~cychen/gcd/An%20analysis%20of%20Lehmer%27s%20Euclidean%20GCD%20algorithm.pdf
+ * If that URI fails, it's also explained in [MpNT, 5.2]
+ *
+ * The basic idea is to avoid divisions which yield only small
+ * quotients, in which the remainder won't reduce the numbers by
+ * much. This can be detected by dividing only the leading k bits.
+ * In our case, k = C_WORD_SIZE - 2.
+ */
+C_inline void lehmer_gcd(C_word **ptr, C_word u, C_word v, C_word *x, C_word *y)
+{
+ int i_even = 1, done = 0;
+ C_word shift_amount = integer_length_abs(u) - (C_WORD_SIZE - 2),
+ uhat, vhat, qhat, ab[C_SIZEOF_FIX_BIGNUM*6], *a = ab, xnext, ynext,
+ xprev = 1, yprev = 0, xcurr = 0, ycurr = 1;
+
+ uhat = C_s_a_i_arithmetic_shift(&a, 2, u, C_fix(-shift_amount));
+ vhat = C_s_a_i_arithmetic_shift(&a, 2, v, C_fix(-shift_amount));
+ assert(uhat & C_FIXNUM_BIT); uhat = C_unfix(uhat);
+ assert(vhat & C_FIXNUM_BIT); vhat = C_unfix(vhat);
+
+ do {
+ qhat = uhat / vhat; /* Estimated quotient for this step */
+ xnext = xprev - qhat * xcurr;
+ ynext = yprev - qhat * ycurr;
+
+ /* Euclidean GCD swap on uhat and vhat (shift_amount is not needed): */
+ shift_amount = vhat;
+ vhat = uhat - qhat * vhat;
+ uhat = shift_amount;
+
+ i_even = !i_even;
+ if (i_even)
+ done = (vhat < -xnext) || ((uhat - vhat) < (ynext - ycurr));
+ else
+ done = (vhat < -ynext) || ((uhat - vhat) < (xnext - xcurr));
+
+ if (!done) {
+ xprev = xcurr; yprev = ycurr;
+ xcurr = xnext; ycurr = ynext;
+ }
+ } while (!done);
+
+ /* x = xprev * u + yprev * v */
+ uhat = C_s_a_u_i_integer_times(&a, 2, C_fix(xprev), u);
+ vhat = C_s_a_u_i_integer_times(&a, 2, C_fix(yprev), v);
+ *x = C_s_a_u_i_integer_plus(ptr, 2, uhat, vhat);
+ *x = move_buffer_object(ptr, ab, *x);
+ clear_buffer_object(ab, uhat);
+ clear_buffer_object(ab, vhat);
+
+ /* y = xcurr * u + ycurr * v */
+ uhat = C_s_a_u_i_integer_times(&a, 2, C_fix(xcurr), u);
+ vhat = C_s_a_u_i_integer_times(&a, 2, C_fix(ycurr), v);
+ *y = C_s_a_u_i_integer_plus(ptr, 2, uhat, vhat);
+ *y = move_buffer_object(ptr, ab, *y);
+ clear_buffer_object(ab, uhat);
+ clear_buffer_object(ab, vhat);
+}
+
/* Because this must be inlineable (due to + and - using this for
* ratnums), we can't use burnikel-ziegler division here, until we
* have a C implementation that doesn't consume stack. However,
@@ -10569,8 +10632,12 @@ C_s_a_u_i_integer_gcd(C_word **ptr, C_word n, C_word x, C_word y)
break;
}
}
+ a = ab[i++];
+ x = C_s_a_u_i_integer_abs(&a, 1, x);
+ y = C_s_a_u_i_integer_abs(&a, 1, y);
while(y != C_fix(0)) {
+ assert(integer_length_abs(x) >= integer_length_abs(y));
/* x and y are stored in the same buffer, as well as a result */
a = ab[i++];
if (i == 2) i = 0;
@@ -10600,13 +10667,27 @@ C_s_a_u_i_integer_gcd(C_word **ptr, C_word n, C_word x, C_word y)
x = y;
y = C_bignum_simplify(res);
}
- } else { /* Both x and y are bignums */
- /* TODO: re-implement Lehmer's GCD algorithm in C? */
+ } else {
+ /* First, see if we should run a Lehmer step */
+ if ((integer_length_abs(x) - integer_length_abs(y)) < C_HALF_WORD_SIZE) {
+ C_word newx, newy;
+
+ lehmer_gcd(&a, x, y, &newx, &newy);
+ clear_buffer_object(ab[i], x);
+ clear_buffer_object(ab[i], y);
+ x = newx;
+ y = newy;
+ if (x & C_FIXNUM_BIT) x = C_a_u_i_fix_to_big(&a, x);
+ if (y & C_FIXNUM_BIT) y = C_a_u_i_fix_to_big(&a, y);
+ a = ab[i++]; /* Ensure x and y get cleared correctly below */
+ if (i == 2) i = 0;
+ }
+
size = C_fix(C_bignum_size(x) + 1);
res = C_allocate_scratch_bignum(&a, size, C_SCHEME_FALSE, C_SCHEME_FALSE);
bignum_destructive_divide_full(x, y, C_SCHEME_UNDEFINED, res,
C_SCHEME_TRUE);
- y = move_buffer_object(&a, ab[i], y);
+ y = move_buffer_object(&a, ab[i], C_bignum_simplify(y));
clear_buffer_object(ab[i], x);
x = y;
y = C_bignum_simplify(res);
Trap