~ chicken-core (chicken-5) 285b06bccf7c43d17e6652c852c33fc55014ed8b
commit 285b06bccf7c43d17e6652c852c33fc55014ed8b Author: Peter Bex <peter@more-magic.net> AuthorDate: Mon Mar 9 23:31:12 2015 +0100 Commit: Peter Bex <peter@more-magic.net> CommitDate: Sun May 31 14:55:23 2015 +0200 Convert gcd to use scratch space, making it inlineable. This is in preparation of converting + and - to use scratch space, since those need gcd support when dealing with ratnums. Unfortunately, by making gcd inlineable, we can't (yet) make use of Burnikel-Ziegler division, which means that gcd on large bignums will not benefit from the fancy algorithms. We drop Lehmer's GCD implementation (at least for now), because transcribing it to C using the current API would result in many more piles of hairy code. Because the inlineable GCD does not receive a continuation, yet still needs to create an unbounded number of intermediate bignums, this requires a way to "re-use" a fixed number of buffers. We add a new "migrate" function which can move all components of an object that live in a particular buffer into another buffer. If the object does not live in that buffer (or is immediate), it will not be touched. This allows us to drag along the x and y objects in gcd with only one "lookbehind buffer" that *may* hold the previous iteration's intermediate object. On the first iteration, x and y will be provided by the caller and hence not live in the buffer, so these won't be touched. diff --git a/chicken.h b/chicken.h index d66cc772..89a34e9a 100644 --- a/chicken.h +++ b/chicken.h @@ -919,6 +919,8 @@ DECL_C_PROC_p0 (128, 1,0,0,0,0,0,0,0) #define C_aligned8(n) ((((C_word)(n)) & 7) == 0) +#define C_buf_end(b) ((C_word *)((C_byte *)(b) + sizeof(b))) + /* This is word-size dependent: */ #ifdef C_SIXTY_FOUR # define C_align(n) C_align8(n) @@ -1897,6 +1899,7 @@ C_fctexport C_word C_vector(C_word **ptr, int n, ...); C_fctexport C_word C_structure(C_word **ptr, int n, ...); C_fctexport C_word C_fcall C_mutate_slot(C_word *slot, C_word val) C_regparm; C_fctexport C_word C_fcall C_scratch_alloc(C_uword size) C_regparm; +C_fctexport C_word C_fcall C_migrate_buffer_object(C_word **ptr, C_word *start, C_word *end, C_word obj) C_regparm; C_fctexport void C_fcall C_reclaim(void *trampoline, void *proc) C_regparm C_noret; C_fctexport void C_save_and_reclaim(void *trampoline, void *proc, int n, ...) C_noret; C_fctexport void C_fcall C_rereclaim2(C_uword size, int double_plus) C_regparm; @@ -2031,6 +2034,8 @@ C_fctexport void C_ccall C_filter_heap_objects(C_word x, C_word closure, C_word C_word vector, C_word userarg) C_noret; C_fctexport time_t C_fcall C_seconds(C_long *ms) C_regparm; C_fctexport C_word C_fcall C_bignum_simplify(C_word big) C_regparm; +C_fctexport C_word C_fcall C_allocate_scratch_bignum(C_word **ptr, C_word size, C_word negp, C_word initp) C_regparm; +C_fctexport C_word C_fcall C_bignum_rewrap(C_word **p, C_word big) C_regparm; C_fctexport C_word C_a_i_list(C_word **a, int c, ...); C_fctexport C_word C_a_i_string(C_word **a, int c, ...); C_fctexport C_word C_a_i_record(C_word **a, int c, ...); @@ -2187,7 +2192,7 @@ C_fctexport C_word C_fcall C_i_file_exists_p(C_word name, C_word file, C_word di C_fctexport C_word C_fcall C_s_a_i_abs(C_word **ptr, C_word n, C_word x) C_regparm; C_fctexport C_word C_fcall C_s_a_i_negate(C_word **ptr, C_word n, C_word x) C_regparm; C_fctexport C_word C_fcall C_s_a_u_i_integer_negate(C_word **ptr, C_word n, C_word x) C_regparm; - +C_fctexport C_word C_fcall C_s_a_u_i_integer_gcd(C_word **ptr, C_word n, C_word x, C_word y) C_regparm; C_fctexport C_word C_fcall C_i_foreign_char_argumentp(C_word x) C_regparm; C_fctexport C_word C_fcall C_i_foreign_fixnum_argumentp(C_word x) C_regparm; diff --git a/library.scm b/library.scm index 0acbc0f0..e7ef8bd6 100644 --- a/library.scm +++ b/library.scm @@ -1894,58 +1894,9 @@ EOF (exact->inexact (##sys#integer-power a (inexact->exact b))) (##sys#integer-power a b)))) ) +;; OBSOLETE: Remove this (or change to define-inline) (define (##sys#integer-gcd a b) - (define k fixnum-precision) ; Can be anything between 2 and min(F, B). - (define k/2 (fx/ k 2)) ; F is fixnum precision and B bits in a big digit - - ;; This is Lehmer's GCD algorithm with Jebelean's quotient test, as - ;; it is presented in the paper "An Analysis of Lehmer’s Euclidean - ;; GCD Algorithm", by J. Sorenson. Fuck the ACM and their goddamn - ;; paywall; you can currently find the paper here: - ;; http://www.csie.nuk.edu.tw/~cychen/gcd/An%20analysis%20of%20Lehmer%27s%20Euclidean%20GCD%20algorithm.pdf - ;; If that URI fails, it's also explained in [MpNT, 5.2] - ;; - ;; The basic idea is to avoid divisions which yield only small - ;; quotients, in which the remainder won't reduce the numbers by - ;; much. This can be detected by dividing only the leading k bits. - (define (lehmer-gcd u v) - (let ((-h (fxneg (fx- (integer-length u) k)))) - (let lp ((i-even? #t) - (u^ (arithmetic-shift u -h)) - (v^ (arithmetic-shift v -h)) - (x-prev 1) (y-prev 0) - (x-curr 0) (y-curr 1)) - (let* ((q^ (fx/ u^ v^)) ; Estimated quotient for this step - (x-next (fx- x-prev (fx* q^ x-curr))) - (y-next (fx- y-prev (fx* q^ y-curr)))) - ;; Euclidian GCD swap on u^ and v^ - (let ((u^ v^) - (v^ (fx- u^ (fx* q^ v^)))) - (let ((done? (if i-even? - (or (fx< v^ (fxneg y-next)) - (fx< (fx- u^ v^) (fx- x-next x-curr))) - (or (fx< v^ (fxneg x-next)) - (fx< (fx- u^ v^) (fx- y-next y-curr)))))) - (if done? - (values (+ (* x-prev u) (* y-prev v)) - (+ (* x-curr u) (* y-curr v))) - (lp (not i-even?) u^ v^ x-curr y-curr x-next y-next)))))))) - - ;; This implements the basic Euclidian GCD algorithm, with a - ;; conditional call to Lehmer's GCD algorithm when the length - ;; difference between a and b is at most one halfdigit. - ;; The complexity of the whole thing is supposedly O(n^2/log n) - ;; where n is the number of bits in a and b. - (let* ((a (abs a)) (b (abs b)) ; Enforce loop invariant on input: - (swap? (##sys#<-2 a b))) ; both must be positive, and a >= b - (let lp ((a (if swap? b a)) - (b (if swap? a b))) - (cond ((eq? b 0) a) - ((fx< (fx- (integer-length a) (integer-length b)) k/2) - (receive (a b) (lehmer-gcd a b) - (if (eq? b 0) a (lp b (##sys#integer-remainder a b))))) - ((fixnum? a) (fxgcd a b)) ; b MUST be fixnum due to loop invariant - (else (lp b (##sys#integer-remainder a b))))))) + (##core#inline_allocate ("C_s_a_u_i_integer_gcd" 6) a b)) ;; Useful for sane error messages (define (##sys#internal-gcd loc a b) diff --git a/runtime.c b/runtime.c index 908ea064..63d66f92 100644 --- a/runtime.c +++ b/runtime.c @@ -217,6 +217,9 @@ extern void _C_do_apply_hack(void *proc, C_word *args, int count) C_noret; #define nmin(x, y) ((x) < (y) ? (x) : (y)) #define percentage(n, p) ((C_long)(((double)(n) * (double)p) / 100)) +#define clear_buffer_object(buf, obj) C_migrate_buffer_object(NULL, (C_word *)(buf), C_buf_end(buf), (obj)) +#define move_buffer_object(ptr, buf, obj) C_migrate_buffer_object(ptr, (C_word *)(buf), C_buf_end(buf), (obj)) + /* The bignum digit representation is fullword- little endian, so on * LE machines the halfdigits are numbered in the same order. On BE * machines, we must swap the odd and even positions. @@ -565,6 +568,7 @@ static C_regparm void bignum_digits_multiply(C_word x, C_word y, C_word result); static void bignum_divide_2_unsigned(C_word c, C_word self, C_word quotient); static void bignum_divide_2_unsigned_2(C_word c, C_word self, C_word remainder); static void bignum_destructive_divide_unsigned_small(C_word c, C_word self, C_word quotient); +static C_regparm void bignum_destructive_divide_full(C_word numerator, C_word denominator, C_word quotient, C_word remainder, C_word return_remainder); static C_regparm void bignum_destructive_divide_normalized(C_word big_u, C_word big_v, C_word big_q); static void make_structure_2(void *dummy) C_noret; static void generic_trampoline(void *dummy) C_noret; @@ -2901,6 +2905,14 @@ C_mutate_slot(C_word *slot, C_word val) * the live data. The reason we store the total length of the object * is because we may be mutating in-place the lengths of the stored * objects, and we need to know how much to skip over while scanning. + * + * If the allocating function returns, it *must* first mark all the + * values in scratch space as reclaimable. This is needed because + * there is no way to distinguish between a stale pointer into scratch + * space that's still somewhere on the stack in "uninitialized" memory + * versus a word that's been recycled by the next called function, + * which now holds a value that happens to have the same bit pattern + * but represents another thing entirely. */ C_regparm C_word C_fcall C_scratch_alloc(C_uword size) { @@ -3014,6 +3026,73 @@ C_regparm C_word C_fcall C_scratch_alloc(C_uword size) return result; } +/* Given a root object, scan its slots recursively (the objects + * themselves should be shallow and non-recursive), and migrate every + * object stored between the memory boundaries to the supplied + * pointer. Scratch data pointed to by objects between the memory + * boundaries is updated to point to the new memory region. If the + * supplied pointer is NULL, the scratch memory is marked reclaimable. + */ +C_regparm C_word C_fcall +C_migrate_buffer_object(C_word **ptr, C_word *start, C_word *end, C_word obj) +{ + C_word size, header, *data, *p = NULL, obj_in_buffer; + + if (C_immediatep(obj)) return obj; + + size = C_header_size(obj); + header = C_block_header(obj); + data = C_data_pointer(obj); + obj_in_buffer = (obj >= (C_word)start && obj < (C_word)end); + + /* Only copy object if we have a target pointer and it's in the buffer */ + if (ptr != NULL && obj_in_buffer) { + p = *ptr; + obj = (C_word)p; /* Return the object's new location at the end */ + } + + if (p != NULL) *p++ = header; + + if (header & C_BYTEBLOCK_BIT) { + if (p != NULL) { + *ptr = (C_word *)((C_byte *)(*ptr) + sizeof(C_header) + C_align(size)); + C_memcpy(p, data, size); + } + } else { + if (p != NULL) *ptr += size + 1; + + if(header & C_SPECIALBLOCK_BIT) { + if (p != NULL) *(p++) = *data; + size--; + data++; + } + + /* TODO: See if we can somehow make this use Cheney's algorithm */ + while(size--) { + C_word slot = *data; + + if(!C_immediatep(slot)) { + if (C_in_scratchspacep(slot)) { + if (obj_in_buffer) { /* Otherwise, don't touch scratch backpointer */ + /* TODO: Support recursing into objects in scratch space? */ + C_word *sp = (C_word *)slot; + + if (*(sp-1) == ALIGNMENT_HOLE_MARKER) --sp; + *(sp-1) = (C_word)p; /* This is why we traverse even if p = NULL */ + *data = C_SCHEME_UNBOUND; /* Ensure old reference is killed dead */ + } + } else { /* Slot is not a scratchspace object: check sub-objects */ + slot = C_migrate_buffer_object(ptr, start, end, slot); + } + } + if (p != NULL) *(p++) = slot; + else *data = slot; /* Sub-object may have moved! */ + data++; + } + } + return obj; /* Should be NULL if ptr was NULL */ +} + /* Register an object's slot as holding data to scratch space. Only * one slot can point to a scratch space object; the object in scratch * space is preceded by a pointer that points to this slot (or NULL). @@ -5763,7 +5842,7 @@ C_s_a_u_i_integer_negate(C_word **ptr, C_word n, C_word x) } else { C_word res, negp = C_mk_nbool(C_bignum_negativep(x)), size = C_fix(C_bignum_size(x)); - res = allocate_scratch_bignum(ptr, size, negp, C_SCHEME_FALSE); + res = C_allocate_scratch_bignum(ptr, size, negp, C_SCHEME_FALSE); bignum_digits_destructive_copy(res, x); return C_bignum_simplify(res); } @@ -9308,7 +9387,8 @@ static C_word allocate_tmp_bignum(C_word size, C_word negp, C_word initp) return C_a_i_record2(&mem, 2, C_bignum_type_tag, bigvec); } -static C_word allocate_scratch_bignum(C_word **ptr, C_word size, C_word negp, C_word initp) +C_regparm C_word C_fcall +C_allocate_scratch_bignum(C_word **ptr, C_word size, C_word negp, C_word initp) { C_word big, bigvec = C_scratch_alloc(C_SIZEOF_INTERNAL_BIGNUM_VECTOR(C_unfix(size))); @@ -9326,7 +9406,10 @@ static C_word allocate_scratch_bignum(C_word **ptr, C_word size, C_word negp, C_ } /* Simplification: scan trailing zeroes, then return a fixnum if the - * value fits, or trim the bignum's length. */ + * value fits, or trim the bignum's length. If the bignum was stored + * in scratch space, we mark it as reclaimable. This means any + * references to the original bignum are invalid after simplification! + */ C_regparm C_word C_fcall C_bignum_simplify(C_word big) { C_uword *start = C_bignum_digits(big), @@ -9340,13 +9423,18 @@ C_regparm C_word C_fcall C_bignum_simplify(C_word big) switch(length) { case 0: + if (C_in_scratchspacep(C_internal_bignum_vector(big))) + C_mutate_scratch_slot(NULL, C_internal_bignum_vector(big)); return C_fix(0); case 1: tmp = *start; if (C_bignum_negativep(big) ? !(tmp & C_INT_SIGN_BIT) && C_fitsinfixnump(-(C_word)tmp) : - C_ufitsinfixnump(tmp)) + C_ufitsinfixnump(tmp)) { + if (C_in_scratchspacep(C_internal_bignum_vector(big))) + C_mutate_scratch_slot(NULL, C_internal_bignum_vector(big)); return C_bignum_negativep(big) ? C_fix(-(C_word)tmp) : C_fix(tmp); + } /* FALLTHROUGH */ default: if (scan < last_digit) C_bignum_mutate_size(big, length); @@ -9502,48 +9590,10 @@ static void bignum_divide_2_unsigned_2(C_word c, C_word self, C_word remainder) return_remainder = C_block_item(self, 5), /* This one may be overwritten with the remainder */ /* remainder_negp = C_block_item(self, 6), */ - quotient = C_block_item(self, 7), - length = C_bignum_size(denominator); - C_uword d1 = *(C_bignum_digits(denominator) + length - 1), - *startr = C_bignum_digits(remainder), - *endr = startr + C_bignum_size(remainder); - int shift; + quotient = C_block_item(self, 7); - shift = C_BIGNUM_DIGIT_LENGTH - C_ilen(d1); /* nlz */ - - /* We have to work on halfdigits, so we shift out only the necessary - * amount in order fill out that halfdigit (base is halved). - * This trick is shamelessly stolen from Gauche :) - * See below for part 2 of the trick. - */ - if (shift >= C_BIGNUM_HALF_DIGIT_LENGTH) - shift -= C_BIGNUM_HALF_DIGIT_LENGTH; - - /* Code below won't always set high halfdigit of quotient, so do it here. */ - if (quotient != C_SCHEME_UNDEFINED) - C_bignum_digits(quotient)[C_bignum_size(quotient)-1] = 0; - - bignum_digits_destructive_copy(remainder, numerator); - *(endr-1) = 0; /* Ensure most significant digit is initialised */ - if (shift == 0) { /* Already normalized */ - bignum_destructive_divide_normalized(remainder, denominator, quotient); - } else { /* Requires normalisation; allocate scratch denominator for this */ - C_uword *startnd; - C_word ndenom; - - bignum_digits_destructive_shift_left(startr, endr, shift); - - ndenom = allocate_tmp_bignum(C_fix(length), C_SCHEME_FALSE, C_SCHEME_FALSE); - startnd = C_bignum_digits(ndenom); - bignum_digits_destructive_copy(ndenom, denominator); - bignum_digits_destructive_shift_left(startnd, startnd+length, shift); - - bignum_destructive_divide_normalized(remainder, ndenom, quotient); - if (C_truep(return_remainder)) /* Otherwise, don't bother shifting back */ - bignum_digits_destructive_shift_right(startr, endr, shift, 0); - - free_tmp_bignum(ndenom); - } + bignum_destructive_divide_full(numerator, denominator, + quotient, remainder, return_remainder); if (C_truep(return_remainder)) { if (C_truep(return_quotient)) { @@ -9594,6 +9644,52 @@ bignum_destructive_divide_unsigned_small(C_word c, C_word self, C_word quotient) } } +static C_regparm void +bignum_destructive_divide_full(C_word numerator, C_word denominator, C_word quotient, C_word remainder, C_word return_remainder) +{ + C_word length = C_bignum_size(denominator); + C_uword d1 = *(C_bignum_digits(denominator) + length - 1), + *startr = C_bignum_digits(remainder), + *endr = startr + C_bignum_size(remainder); + int shift; + + shift = C_BIGNUM_DIGIT_LENGTH - C_ilen(d1); /* nlz */ + + /* We have to work on halfdigits, so we shift out only the necessary + * amount in order fill out that halfdigit (base is halved). + * This trick is shamelessly stolen from Gauche :) + * See below for part 2 of the trick. + */ + if (shift >= C_BIGNUM_HALF_DIGIT_LENGTH) + shift -= C_BIGNUM_HALF_DIGIT_LENGTH; + + /* Code below won't always set high halfdigit of quotient, so do it here. */ + if (quotient != C_SCHEME_UNDEFINED) + C_bignum_digits(quotient)[C_bignum_size(quotient)-1] = 0; + + bignum_digits_destructive_copy(remainder, numerator); + *(endr-1) = 0; /* Ensure most significant digit is initialised */ + if (shift == 0) { /* Already normalized */ + bignum_destructive_divide_normalized(remainder, denominator, quotient); + } else { /* Requires normalisation; allocate scratch denominator for this */ + C_uword *startnd; + C_word ndenom; + + bignum_digits_destructive_shift_left(startr, endr, shift); + + ndenom = allocate_tmp_bignum(C_fix(length), C_SCHEME_FALSE, C_SCHEME_FALSE); + startnd = C_bignum_digits(ndenom); + bignum_digits_destructive_copy(ndenom, denominator); + bignum_digits_destructive_shift_left(startnd, startnd+length, shift); + + bignum_destructive_divide_normalized(remainder, ndenom, quotient); + if (C_truep(return_remainder)) /* Otherwise, don't bother shifting back */ + bignum_digits_destructive_shift_right(startr, endr, shift, 0); + + free_tmp_bignum(ndenom); + } +} + static C_regparm void bignum_destructive_divide_normalized(C_word big_u, C_word big_v, C_word big_q) { @@ -9747,6 +9843,92 @@ C_a_i_flonum_gcd(C_word **p, C_word n, C_word x, C_word y) return C_flonum(p, xub); } +/* Because this must be inlineable (due to + and - using this for + * ratnums), we can't use burnikel-ziegler division here, until we + * have a C implementation that doesn't consume stack. However, + * we *can* use Lehmer's GCD. + */ +C_regparm C_word C_fcall +C_s_a_u_i_integer_gcd(C_word **ptr, C_word n, C_word x, C_word y) +{ + C_word ab[2][C_SIZEOF_FIX_BIGNUM*4], *a, res, size, i = 0; + + /* Ensure loop invariant: abs(x) >= abs(y) */ + if (x & C_FIXNUM_BIT) { + if (y & C_FIXNUM_BIT) { + return C_i_fixnum_gcd(x, y); + } else { /* x is fixnum, y is bignum: swap */ + C_word tmp = y; + y = x; + x = tmp; + } + } else if (!(y & C_FIXNUM_BIT)) { /* Both are bignums: compare */ + switch (bignum_cmp_unsigned(x, y)) { + case -1: + { + C_word tmp = y; + y = x; + x = tmp; + break; + } + case 0: /* gcd(x, x) = abs(x); Try to reuse positive argument, if any */ + if (!C_bignum_negativep(x)) return x; + else return C_s_a_u_i_integer_abs(ptr, 1, y); + default: /* Do nothing: x > y */ + break; + } + } + + while(y != C_fix(0)) { + /* x and y are stored in the same buffer, as well as a result */ + a = ab[i++]; + if (i == 2) i = 0; + + if (x & C_FIXNUM_BIT) { + return C_i_fixnum_gcd(x, y); + } else if (y & C_FIXNUM_BIT) { + C_word absy = y & C_INT_SIGN_BIT ? -C_unfix(y) : C_unfix(y), + next_power = (C_uword)1 << (C_ilen(absy)-1); + + if (next_power == absy && C_fitsinfixnump(absy)) { + y = C_fix(*(C_bignum_digits(x)) & (next_power - 1)); + clear_buffer_object(ab[i], x); + x = C_fix(absy); + } else if (C_fitsinbignumhalfdigitp(absy)) { + y = C_fix(bignum_remainder_unsigned_halfdigit(x, absy)); + clear_buffer_object(ab[i], x); + x = C_fix(absy); + } else { + absy = C_a_u_i_fix_to_big(&a, y); + size = C_fix(C_bignum_size(x) + 1); + res = C_allocate_scratch_bignum(&a, size, C_SCHEME_FALSE, + C_SCHEME_FALSE); + bignum_destructive_divide_full(x, absy, C_SCHEME_UNDEFINED, res, + C_SCHEME_TRUE); + clear_buffer_object(ab[i], x); + x = y; + y = C_bignum_simplify(res); + } + } else { /* Both x and y are bignums */ + /* TODO: re-implement Lehmer's GCD algorithm in C? */ + size = C_fix(C_bignum_size(x) + 1); + res = C_allocate_scratch_bignum(&a, size, C_SCHEME_FALSE, C_SCHEME_FALSE); + bignum_destructive_divide_full(x, y, C_SCHEME_UNDEFINED, res, + C_SCHEME_TRUE); + y = move_buffer_object(&a, ab[i], y); + clear_buffer_object(ab[i], x); + x = y; + y = C_bignum_simplify(res); + } + } + + res = C_s_a_u_i_integer_abs(ptr, 1, x); + res = move_buffer_object(ptr, ab, res); + clear_buffer_object(ab, x); + clear_buffer_object(ab, y); + return res; +} + /* XXX TODO OBSOLETE: This can be removed after recompiling c-platform.scm */ void C_ccall C_quotient(C_word c, C_word closure, C_word k, C_word n1, C_word n2) diff --git a/types.db b/types.db index e12706f9..ca8ca2ca 100644 --- a/types.db +++ b/types.db @@ -486,7 +486,8 @@ (() '0) ((fixnum fixnum) (fixnum) (fxgcd #(1) #(2))) ((float float) (float) (fpgcd #(1) #(2))) - ((integer integer) (integer) (##sys#integer-gcd #(1) #(2))) + ((integer integer) (integer) + (##core#inline_allocate ("C_s_a_u_i_integer_gcd" 6) #(1) #(2))) ((* *) (##sys#gcd #(1) #(2)))) (##sys#gcd (#(procedure #:clean #:enforce #:foldable) ##sys#gcd (number number) number))Trap