~ chicken-core (chicken-5) cb9c9171df6927c1eaf08dfcaeb70c395914b778
commit cb9c9171df6927c1eaf08dfcaeb70c395914b778 Author: Peter Bex <peter@more-magic.net> AuthorDate: Fri Jul 7 11:45:51 2023 +0200 Commit: felix <felix@call-with-current-continuation.org> CommitDate: Tue Jul 11 12:12:33 2023 +0200 Don't retain weak references to finalizable objects When an object would be garbage, but has a finalizer, it will be artificially kept alive at the end of a GC cycle so that the finalizer gets a chance to run, with the original event as an argument. However, this object is effectively supposed to be garbage, and the finalizer may put the object in a defunct state. This means we don't want code to be able to extract such an object through a weak reference, because it should be considered garbage and *might* be already cleaned up. This could be a potential footgun, making use after free bugs more likely. To fix this, we make these "garbage" objects appear as garbage for the purpose of clearing weak pointers. The implementation is relatively trivial - we simply remember the heap top pointer before we revive garbage objects that have finalizers, and when dereferencing the pointer to the new object's location, where we check that the object is in the target heap, we also check that it's not in the portion of the heap where we've put the finalizable objects and anything they reference. Signed-off-by: felix <felix@call-with-current-continuation.org> diff --git a/manual/Module (chicken gc) b/manual/Module (chicken gc) index 48653e3a..25a29373 100644 --- a/manual/Module (chicken gc) +++ b/manual/Module (chicken gc) @@ -53,6 +53,12 @@ Multiple finalizers can be registered for the same object. The order in which the finalizers run is undefined. Execution of finalizers may be nested. +NOTE: When a finalizable object has any weak references (i.e., weak +locatives or weak pairs) to objects that are only reachable through it +or other finalizable objects, those references will be broken like +when the objects had already been collected. This is done in order to +avoid user code from accessing objects that are possibly in an +invalid state. === force-finalizers diff --git a/runtime.c b/runtime.c index fbce94fd..6fdd7fca 100644 --- a/runtime.c +++ b/runtime.c @@ -543,8 +543,8 @@ static void C_fcall mark_nested_objects(C_byte *heap_scan_top, C_byte *tgt_space static void C_fcall mark_live_objects(C_byte *tgt_space_start, C_byte **tgt_space_top, C_byte *tgt_space_limit) C_regparm; static void C_fcall mark_live_heap_only_objects(C_byte *tgt_space_start, C_byte **tgt_space_top, C_byte *tgt_space_limit) C_regparm; static C_word C_fcall intern0(C_char *name) C_regparm; -static void C_fcall update_weak_pairs(int mode) C_regparm; -static void C_fcall update_locatives(int mode) C_regparm; +static void C_fcall update_weak_pairs(int mode, C_byte *undead_start, C_byte *undead_end) C_regparm; +static void C_fcall update_locatives(int mode, C_byte *undead_start, C_byte *undead_end) C_regparm; static LF_LIST *find_module_handle(C_char *name); static void set_profile_timer(C_uword freq); static void take_profile_sample(); @@ -3497,8 +3497,8 @@ C_regparm void C_fcall C_reclaim(void *trampoline, C_word c) count = (C_uword)C_fromspace_top - (C_uword)start; ++gc_count_1; ++gc_count_1_total; - update_locatives(GC_MINOR); - update_weak_pairs(GC_MINOR); + update_locatives(GC_MINOR, start, *tgt_space_top); + update_weak_pairs(GC_MINOR, start, *tgt_space_top); } else { /* Mark finalizer list and remember pointers to non-forwarded items: */ @@ -3585,8 +3585,8 @@ C_regparm void C_fcall C_reclaim(void *trampoline, C_word c) } } - update_locatives(gc_mode); - update_weak_pairs(gc_mode); + update_locatives(gc_mode, start, *tgt_space_top); + update_weak_pairs(gc_mode, start, *tgt_space_top); count = (C_uword)tospace_top - (C_uword)tospace_start; // Actual used, < heap_size/2 @@ -4007,8 +4007,8 @@ C_regparm void C_fcall C_rereclaim2(C_uword size, int relative_resize) /* Mark nested values in already moved (marked) blocks in breadth-first manner: */ mark_nested_objects(start, new_tospace_start, &new_tospace_top, new_tospace_limit); - update_locatives(GC_REALLOC); - update_weak_pairs(GC_REALLOC); + update_locatives(GC_REALLOC, new_tospace_top, new_tospace_top); + update_weak_pairs(GC_REALLOC, new_tospace_top, new_tospace_top); heap_free (heapspace1, heapspace1_size); heap_free (heapspace2, heapspace2_size); @@ -4053,10 +4053,11 @@ C_regparm void C_fcall C_rereclaim2(C_uword size, int relative_resize) * can *only* contain weak-pairs-turned-forwarding-pointer, we may * freely access the first slot of such forwarding pointers. */ -static C_regparm void C_fcall update_weak_pairs(int mode) +static C_regparm void C_fcall update_weak_pairs(int mode, C_byte *undead_start, C_byte *undead_end) { int weakn = 0; C_word p, pair, car, h; + C_byte *car_ptr; /* NOTE: Don't use C_block_item() because it asserts the block is * big enough in DEBUGBUILD, but forwarding pointers have size 0. @@ -4087,10 +4088,12 @@ static C_regparm void C_fcall update_weak_pairs(int mode) h = C_block_header(car); } - /* If the car is unreferenced by anyone else, it wasn't moved by GC. So drop it: */ + car_ptr = (C_byte *)(C_uword)car; + /* If the car is unreferenced by anyone else, it wasn't moved by GC. Or, if it's in the "undead" portion of + the new heap, it was moved because it was only referenced by a revived finalizable object. In either case, drop it: */ if((mode == GC_MINOR && C_in_stackp(car)) || - (mode == GC_MAJOR && (C_in_stackp(car) || C_in_fromspacep(car))) || - (mode == GC_REALLOC && (C_in_stackp(car) || C_in_heapp(car)))) { /* NB: *old* heap! */ + (mode == GC_MAJOR && (C_in_stackp(car) || C_in_fromspacep(car) || (car_ptr >= undead_start && car_ptr < undead_end))) || + (mode == GC_REALLOC && (C_in_stackp(car) || C_in_heapp(car) || (car_ptr >= undead_start && car_ptr < undead_end)))) { /* NB: *old* heap! */ C_set_block_item(pair, 0, C_SCHEME_BROKEN_WEAK_PTR); ++weakn; @@ -4109,10 +4112,11 @@ static C_regparm void C_fcall update_weak_pairs(int mode) * so the updating of that pointer is not handled by the GC proper * (which only deals with full objects). */ -static C_regparm void C_fcall update_locatives(int mode) +static C_regparm void C_fcall update_locatives(int mode, C_byte *undead_start, C_byte *undead_end) { int weakn = 0; C_word p, loc, ptr, obj, h, offset; + C_byte *obj_ptr; for (p = locative_chain; p != (C_word)NULL; p = *((C_word *)C_data_pointer(p))) { h = C_block_header(p); @@ -4136,10 +4140,12 @@ static C_regparm void C_fcall update_locatives(int mode) h = C_block_header(obj); } - /* If the object is unreferenced by anyone else, it wasn't moved by GC. So drop it: */ + obj_ptr = (C_byte *)(C_uword)obj; + /* If the object is unreferenced by anyone else, it wasn't moved by GC. Or, if it's in the "undead" portion of + the new heap, it was moved because it was only referenced by a revived finalizable object. In either case, drop it: */ if((mode == GC_MINOR && C_in_stackp(obj)) || - (mode == GC_MAJOR && (C_in_stackp(obj) || C_in_fromspacep(obj))) || - (mode == GC_REALLOC && (C_in_stackp(obj) || C_in_heapp(obj)))) { /* NB: *old* heap! */ + (mode == GC_MAJOR && (C_in_stackp(obj) || C_in_fromspacep(obj) || (obj_ptr >= undead_start && obj_ptr < undead_end))) || + (mode == GC_REALLOC && (C_in_stackp(obj) || C_in_heapp(obj) || (obj_ptr >= undead_start && obj_ptr < undead_end)))) { /* NB: *old* heap! */ /* NOTE: This does *not* use BROKEN_WEAK_POINTER. This slot * holds an unaligned raw C pointer, not a Scheme object */ diff --git a/tests/weak-pointer-test.scm b/tests/weak-pointer-test.scm index d13ccfe2..9cb78312 100644 --- a/tests/weak-pointer-test.scm +++ b/tests/weak-pointer-test.scm @@ -204,4 +204,117 @@ (test-error "Fifth locative is reclaimed" (locative-ref loc5)) (test-equal "Sixth locative is NOT reclaimed" (locative-ref loc6) (vector-ref nested-held-onto-value 1)))) + +(test-group "Testing that weak pairs get invalidated before finalizing would-be garbage" + (gc #t) ; Improve chances we don't get a minor GC in between + (let* ((not-held-onto-value (vector 42)) + (held-onto-value (vector 99)) + (garbage-a (vector (weak-cons not-held-onto-value '()) (weak-cons held-onto-value '()) #f)) + (garbage-b (vector (weak-cons not-held-onto-value '()) (weak-cons held-onto-value '()) #f)) + + (garbage-a-weak-ref (weak-cons garbage-a '())) + (garbage-b-weak-ref (weak-cons garbage-b '())) + + (observed-garbage-a-0 #f) + (observed-garbage-a-1 #f) + (observed-garbage-a-2 #f) + (observed-garbage-b-0 #f) + (observed-garbage-b-1 #f) + (observed-garbage-b-2 #f)) + + ;; Garbage weakly references eachother + (vector-set! garbage-a 2 (weak-cons garbage-b '())) + (vector-set! garbage-b 2 (weak-cons garbage-a '())) + + (set-finalizer! garbage-a (lambda (vec) + (set! observed-garbage-a-0 (car (vector-ref vec 0))) + (set! observed-garbage-a-1 (car (vector-ref vec 1))) + (set! observed-garbage-a-2 (car (vector-ref vec 2))))) + (set-finalizer! garbage-b (lambda (vec) + (set! observed-garbage-b-0 (car (vector-ref vec 0))) + (set! observed-garbage-b-1 (car (vector-ref vec 1))) + (set! observed-garbage-b-2 (car (vector-ref vec 2))))) + + (set! not-held-onto-value #f) + (set! garbage-a #f) + (set! garbage-b #f) + + ;; Must be a major collection, finalizers don't get queued on minor GC + ;; (gc #t) + ;; NOTE: The above won't work because it triggers *another* GC after running finalizers, + ;; which would invalidate all weak pairs anyway. So instead, we create garbage until + ;; the finalizers have run. This is more like what happens in a regular program. + (let lp () + (unless (and observed-garbage-a-0 observed-garbage-b-0) + (make-vector 1000) + (lp))) + + (test-assert "Weak pair's car which pointed to first garbage contains broken weak pointer" (bwp-object? (car garbage-a-weak-ref))) + (test-assert "Weak pair's car in first garbage which pointed to collected object contains broken weak pointer" (bwp-object? observed-garbage-a-0)) + (test-equal "Weak pair's car in first garbage which pointed to retained object contains the original object" observed-garbage-a-1 held-onto-value) + (test-assert "Weak pair's car in first garbage which pointed to second garbage contains broken weak pointer" (bwp-object? observed-garbage-a-2)) + (test-assert "Weak pair's car which pointed to second garbage contains broken weak pointer" (bwp-object? (car garbage-b-weak-ref))) + (test-assert "Weak pair's car in second garbage which pointed to collected object contains broken weak pointer" (bwp-object? observed-garbage-b-0)) + (test-equal "Weak pair's car in second garbage which pointed to retained object contains the original object" observed-garbage-b-1 held-onto-value) + (test-assert "Weak pair's car in second garbage which pointed to first garbage contains broken weak pointer" (bwp-object? observed-garbage-b-2)))) + + +;; Safe version of locative-ref, returns !#bwp instead of raising an exception +(define (weak-locative-ref loc) + (condition-case (locative-ref loc) + ((exn type) #!bwp))) + +(test-group "Testing that weak locatives get invalidated before finalizing would-be garbage" + (gc #t) ; Improve chances we don't get a minor GC in between + (let* ((not-held-onto-value (vector (vector 42))) + (held-onto-value (vector (vector 99))) + (garbage-a (vector (make-weak-locative not-held-onto-value 0) (make-weak-locative held-onto-value 0) #f)) + (garbage-b (vector (make-weak-locative not-held-onto-value 0) (make-weak-locative held-onto-value 0) #f)) + + (garbage-a-weak-ref (make-weak-locative garbage-a 0)) + (garbage-b-weak-ref (make-weak-locative garbage-b 0)) + + (observed-garbage-a-0 #f) + (observed-garbage-a-1 #f) + (observed-garbage-a-2 #f) + (observed-garbage-b-0 #f) + (observed-garbage-b-1 #f) + (observed-garbage-b-2 #f)) + + ;; Garbage weakly references eachother + (vector-set! garbage-a 2 (make-weak-locative garbage-b 0)) + (vector-set! garbage-b 2 (make-weak-locative garbage-a 0)) + + (set-finalizer! garbage-a (lambda (vec) + (set! observed-garbage-a-0 (weak-locative-ref (vector-ref vec 0))) + (set! observed-garbage-a-1 (weak-locative-ref (vector-ref vec 1))) + (set! observed-garbage-a-2 (weak-locative-ref (vector-ref vec 2))))) + (set-finalizer! garbage-b (lambda (vec) + (set! observed-garbage-b-0 (weak-locative-ref (vector-ref vec 0))) + (set! observed-garbage-b-1 (weak-locative-ref (vector-ref vec 1))) + (set! observed-garbage-b-2 (weak-locative-ref (vector-ref vec 2))))) + + (set! not-held-onto-value #f) + (set! garbage-a #f) + (set! garbage-b #f) + + ;; Must be a major collection, finalizers don't get queued on minor GC + ;; (gc #t) + ;; NOTE: The above won't work because it triggers *another* GC after running finalizers, + ;; which would invalidate all weak pairs anyway. So instead, we create garbage until + ;; the finalizers have run. This is more like what happens in a regular program. + (let lp () + (unless (and observed-garbage-a-0 observed-garbage-b-0) + (make-vector 1000) + (lp))) + + (test-assert "Weak locative which pointed to first garbage contains broken weak pointer" (bwp-object? (weak-locative-ref garbage-a-weak-ref))) + (test-assert "Weak locative in first garbage which pointed to collected object contains broken weak pointer" (bwp-object? observed-garbage-a-0)) + (test-equal "Weak locative in first garbage which pointed to retained object contains the original object" observed-garbage-a-1 (vector-ref held-onto-value 0)) + (test-assert "Weak locative in first garbage which pointed to second garbage contains broken weak pointer" (bwp-object? observed-garbage-a-2)) + (test-assert "Weak locative which pointed to second garbage contains broken weak pointer" (bwp-object? (weak-locative-ref garbage-b-weak-ref))) + (test-assert "Weak locative in second garbage which pointed to collected object contains broken weak pointer" (bwp-object? observed-garbage-b-0)) + (test-equal "Weak locative in second garbage which pointed to retained object contains the original object" observed-garbage-b-1 (vector-ref held-onto-value 0)) + (test-assert "Weak locative in second garbage which pointed to first garbage contains broken weak pointer" (bwp-object? observed-garbage-b-2)))) + (test-exit)Trap