~ chicken-core (chicken-5) b0711c4ca7f8e92dd18807a1c553e6ce649f1380
commit b0711c4ca7f8e92dd18807a1c553e6ce649f1380 Author: Peter Bex <peter@more-magic.net> AuthorDate: Mon Jul 6 23:39:14 2015 +0200 Commit: Peter Bex <peter@more-magic.net> CommitDate: Mon Jul 6 23:39:14 2015 +0200 Port B/Z division perf stability fix from numbers. This changes Burnikel-Ziegler division's threshold check to look at the *difference* between the two arguments instead of the size of the smallest one. This idea is taken from MCA, algorithm 1.9. This seems to make a big difference on the "pidigits" benchmark from the Great Language Shootout game, which somehow triggers such pathological behaviour of the B/Z division routine that using plain gradebook division would be up to three times faster on that benchmark. Other benchmarks are mostly unaffected by this issue. Many thanks to "balkenbrij" on Reddit for finding this issue! diff --git a/chicken.h b/chicken.h index c0f437c2..ecd4203c 100644 --- a/chicken.h +++ b/chicken.h @@ -422,7 +422,7 @@ static inline int isinf_ld (long double x) /* This defines when to switch from schoolbook to Burnikel-Ziegler * division. It creates even more garbage than Karatsuba :( */ -# define C_BURNIKEL_ZIEGLER_THRESHOLD 300 +# define C_BURNIKEL_ZIEGLER_DIFF_THRESHOLD 300 #endif #ifndef C_RECURSIVE_TO_STRING_THRESHOLD /* This threshold is in terms of the expected string length. */ diff --git a/runtime.c b/runtime.c index 4696dd3d..d69e08a4 100644 --- a/runtime.c +++ b/runtime.c @@ -8683,10 +8683,8 @@ bignum_divrem(C_word **ptr, C_word x, C_word y, C_word *q, C_word *r) case 1: default: res = C_SCHEME_FALSE; - size = C_bignum_size(x); - if (size >= C_BURNIKEL_ZIEGLER_THRESHOLD && - /* This avoids endless recursion for odd Ns just above threshold */ - !(size & 1 && size < (C_BURNIKEL_ZIEGLER_THRESHOLD << 1))) { + size = C_bignum_size(x) - C_bignum_size(y); + if (size > C_BURNIKEL_ZIEGLER_DIFF_THRESHOLD) { res = bignum_divide_burnikel_ziegler(ptr, x, y, q, r); } @@ -8728,13 +8726,13 @@ bignum_divide_burnikel_ziegler(C_word **ptr, C_word x, C_word y, C_word *q, C_wo x = C_s_a_u_i_integer_abs(&a, 1, x); y = C_s_a_u_i_integer_abs(&a, 1, y); - /* Define m as min{2^k|(2^k)*BURNIKEL_ZIEGLER_THRESHOLD > s} + /* Define m as min{2^k|(2^k)*BURNIKEL_ZIEGLER_DIFF_THRESHOLD > s} * This ensures we shift as little as possible (less pressure * on the GC) while maintaining a power of two until we drop * below the threshold, so we can always split N in half. */ s = C_bignum_size(y); - m = 1 << C_ilen(s / C_BURNIKEL_ZIEGLER_THRESHOLD); + m = 1 << C_ilen(s / C_BURNIKEL_ZIEGLER_DIFF_THRESHOLD); j = (s+m-1) / m; /* j = s/m, rounded up */ n = j * m; @@ -8901,12 +8899,16 @@ burnikel_ziegler_2n_div_1n(C_word **ptr, C_word a, C_word b, C_word b1, C_word b { C_word kab[2][C_SIZEOF_FIX_BIGNUM*7], *ka, a12, a3, a4, q1 = C_fix(0), r1, q2 = C_fix(0), r2, *qp; - int stack_full = 0; + int stack_full = 0, size_a, size_b; C_stack_check1(stack_full = 1); + size_a = (a & C_FIXNUM_BIT) ? 1 : C_bignum_size(a); + size_b = (b & C_FIXNUM_BIT) ? 1 : C_bignum_size(b); + n = C_unfix(n); - if (stack_full || (n & 1) || (n < C_BURNIKEL_ZIEGLER_THRESHOLD)) { + if (stack_full || (n & 1) || + ((size_a - size_b) < C_BURNIKEL_ZIEGLER_DIFF_THRESHOLD)) { integer_divrem(ptr, a, b, q, r); } else { ka = kab[0];Trap