~ chicken-core (chicken-5) 15d7c84f68a53b6ff968d3a226ecf65d40496eb4
commit 15d7c84f68a53b6ff968d3a226ecf65d40496eb4 Author: Peter Bex <peter@more-magic.net> AuthorDate: Sun Mar 29 19:55:04 2015 +0200 Commit: Peter Bex <peter@more-magic.net> CommitDate: Sun May 31 14:55:24 2015 +0200 Restore Lehmer's gcd, which *really* improves bignum performance in some cases. diff --git a/library.scm b/library.scm index cd827125..c23e7739 100644 --- a/library.scm +++ b/library.scm @@ -1111,6 +1111,9 @@ EOF ;;; Basic arithmetic: +(define-inline (%integer-gcd a b) + (##core#inline_allocate ("C_s_a_u_i_integer_gcd" 6) a b)) + (define (abs x) (##core#inline_allocate ("C_s_a_i_abs" 10) x)) (define (/ arg1 . args) @@ -1127,7 +1130,7 @@ EOF (when (eq? y 0) (##sys#error-hook (foreign-value "C_DIVISION_BY_ZERO_ERROR" int) '/ x y)) (cond ((and (exact-integer? x) (exact-integer? y)) - (let ((g (##sys#integer-gcd x y))) + (let ((g (%integer-gcd x y))) (ratnum (##sys#integer-quotient x g) (##sys#integer-quotient y g)))) ;; Compnum *must* be checked first ((or (cplxnum? x) (cplxnum? y)) @@ -1147,8 +1150,8 @@ EOF ;; With g1 = gcd(a, c) and g2 = gcd(b, d) [Knuth, 4.5.1 ex. 4] (let* ((a (%ratnum-numerator x)) (b (%ratnum-denominator x)) (c (%ratnum-numerator y)) (d (%ratnum-denominator y)) - (g1 (##sys#integer-gcd a c)) - (g2 (##sys#integer-gcd b d))) + (g1 (%integer-gcd a c)) + (g2 (%integer-gcd b d))) (ratnum (* (quotient a g1) (quotient d g2)) (* (quotient b g2) (quotient c g1)))) ;; a/b / c/d = a*d / b*c [with d = 1] @@ -1629,24 +1632,20 @@ EOF (exact->inexact (##sys#integer-power a (inexact->exact b))) (##sys#integer-power a b)))) ) -;; OBSOLETE: Remove this (or change to define-inline) -(define (##sys#integer-gcd a b) - (##core#inline_allocate ("C_s_a_u_i_integer_gcd" 6) a b)) - ;; Useful for sane error messages (define (##sys#internal-gcd loc a b) (cond ((exact-integer? a) - (cond ((exact-integer? b) (##sys#integer-gcd a b)) + (cond ((exact-integer? b) (%integer-gcd a b)) ((and (##core#inline "C_i_flonump" b) (##core#inline "C_u_i_fpintegerp" b)) - (exact->inexact (##sys#integer-gcd a (inexact->exact b)))) + (exact->inexact (%integer-gcd a (inexact->exact b)))) (else (##sys#error-bad-integer b loc)))) ((and (##core#inline "C_i_flonump" a) (##core#inline "C_u_i_fpintegerp" a)) (cond ((##core#inline "C_i_flonump" b) (##core#inline_allocate ("C_a_i_flonum_gcd" 4) a b)) ((exact-integer? b) - (exact->inexact (##sys#integer-gcd (inexact->exact a) b))) + (exact->inexact (%integer-gcd (inexact->exact a) b))) (else (##sys#error-bad-integer b loc)))) (else (##sys#error-bad-integer a loc)))) ;; For compat reasons, we define this diff --git a/runtime.c b/runtime.c index ede32de8..2074468f 100644 --- a/runtime.c +++ b/runtime.c @@ -10534,6 +10534,69 @@ C_a_i_flonum_gcd(C_word **p, C_word n, C_word x, C_word y) return C_flonum(p, xub); } +/* This is Lehmer's GCD algorithm with Jebelean's quotient test, as + * it is presented in the paper "An Analysis of Lehmer’s Euclidean + * GCD Algorithm", by J. Sorenson. Fuck the ACM and their goddamn + * paywall; you can currently find the paper here: + * http://www.csie.nuk.edu.tw/~cychen/gcd/An%20analysis%20of%20Lehmer%27s%20Euclidean%20GCD%20algorithm.pdf + * If that URI fails, it's also explained in [MpNT, 5.2] + * + * The basic idea is to avoid divisions which yield only small + * quotients, in which the remainder won't reduce the numbers by + * much. This can be detected by dividing only the leading k bits. + * In our case, k = C_WORD_SIZE - 2. + */ +C_inline void lehmer_gcd(C_word **ptr, C_word u, C_word v, C_word *x, C_word *y) +{ + int i_even = 1, done = 0; + C_word shift_amount = integer_length_abs(u) - (C_WORD_SIZE - 2), + uhat, vhat, qhat, ab[C_SIZEOF_FIX_BIGNUM*6], *a = ab, xnext, ynext, + xprev = 1, yprev = 0, xcurr = 0, ycurr = 1; + + uhat = C_s_a_i_arithmetic_shift(&a, 2, u, C_fix(-shift_amount)); + vhat = C_s_a_i_arithmetic_shift(&a, 2, v, C_fix(-shift_amount)); + assert(uhat & C_FIXNUM_BIT); uhat = C_unfix(uhat); + assert(vhat & C_FIXNUM_BIT); vhat = C_unfix(vhat); + + do { + qhat = uhat / vhat; /* Estimated quotient for this step */ + xnext = xprev - qhat * xcurr; + ynext = yprev - qhat * ycurr; + + /* Euclidean GCD swap on uhat and vhat (shift_amount is not needed): */ + shift_amount = vhat; + vhat = uhat - qhat * vhat; + uhat = shift_amount; + + i_even = !i_even; + if (i_even) + done = (vhat < -xnext) || ((uhat - vhat) < (ynext - ycurr)); + else + done = (vhat < -ynext) || ((uhat - vhat) < (xnext - xcurr)); + + if (!done) { + xprev = xcurr; yprev = ycurr; + xcurr = xnext; ycurr = ynext; + } + } while (!done); + + /* x = xprev * u + yprev * v */ + uhat = C_s_a_u_i_integer_times(&a, 2, C_fix(xprev), u); + vhat = C_s_a_u_i_integer_times(&a, 2, C_fix(yprev), v); + *x = C_s_a_u_i_integer_plus(ptr, 2, uhat, vhat); + *x = move_buffer_object(ptr, ab, *x); + clear_buffer_object(ab, uhat); + clear_buffer_object(ab, vhat); + + /* y = xcurr * u + ycurr * v */ + uhat = C_s_a_u_i_integer_times(&a, 2, C_fix(xcurr), u); + vhat = C_s_a_u_i_integer_times(&a, 2, C_fix(ycurr), v); + *y = C_s_a_u_i_integer_plus(ptr, 2, uhat, vhat); + *y = move_buffer_object(ptr, ab, *y); + clear_buffer_object(ab, uhat); + clear_buffer_object(ab, vhat); +} + /* Because this must be inlineable (due to + and - using this for * ratnums), we can't use burnikel-ziegler division here, until we * have a C implementation that doesn't consume stack. However, @@ -10569,8 +10632,12 @@ C_s_a_u_i_integer_gcd(C_word **ptr, C_word n, C_word x, C_word y) break; } } + a = ab[i++]; + x = C_s_a_u_i_integer_abs(&a, 1, x); + y = C_s_a_u_i_integer_abs(&a, 1, y); while(y != C_fix(0)) { + assert(integer_length_abs(x) >= integer_length_abs(y)); /* x and y are stored in the same buffer, as well as a result */ a = ab[i++]; if (i == 2) i = 0; @@ -10600,13 +10667,27 @@ C_s_a_u_i_integer_gcd(C_word **ptr, C_word n, C_word x, C_word y) x = y; y = C_bignum_simplify(res); } - } else { /* Both x and y are bignums */ - /* TODO: re-implement Lehmer's GCD algorithm in C? */ + } else { + /* First, see if we should run a Lehmer step */ + if ((integer_length_abs(x) - integer_length_abs(y)) < C_HALF_WORD_SIZE) { + C_word newx, newy; + + lehmer_gcd(&a, x, y, &newx, &newy); + clear_buffer_object(ab[i], x); + clear_buffer_object(ab[i], y); + x = newx; + y = newy; + if (x & C_FIXNUM_BIT) x = C_a_u_i_fix_to_big(&a, x); + if (y & C_FIXNUM_BIT) y = C_a_u_i_fix_to_big(&a, y); + a = ab[i++]; /* Ensure x and y get cleared correctly below */ + if (i == 2) i = 0; + } + size = C_fix(C_bignum_size(x) + 1); res = C_allocate_scratch_bignum(&a, size, C_SCHEME_FALSE, C_SCHEME_FALSE); bignum_destructive_divide_full(x, y, C_SCHEME_UNDEFINED, res, C_SCHEME_TRUE); - y = move_buffer_object(&a, ab[i], y); + y = move_buffer_object(&a, ab[i], C_bignum_simplify(y)); clear_buffer_object(ab[i], x); x = y; y = C_bignum_simplify(res);Trap