~ chicken-core (chicken-5) cb9c9171df6927c1eaf08dfcaeb70c395914b778


commit cb9c9171df6927c1eaf08dfcaeb70c395914b778
Author:     Peter Bex <peter@more-magic.net>
AuthorDate: Fri Jul 7 11:45:51 2023 +0200
Commit:     felix <felix@call-with-current-continuation.org>
CommitDate: Tue Jul 11 12:12:33 2023 +0200

    Don't retain weak references to finalizable objects
    
    When an object would be garbage, but has a finalizer, it will be
    artificially kept alive at the end of a GC cycle so that the finalizer
    gets a chance to run, with the original event as an argument.
    
    However, this object is effectively supposed to be garbage, and the
    finalizer may put the object in a defunct state.  This means we don't
    want code to be able to extract such an object through a weak
    reference, because it should be considered garbage and *might* be
    already cleaned up.  This could be a potential footgun, making use
    after free bugs more likely.
    
    To fix this, we make these "garbage" objects appear as garbage for the
    purpose of clearing weak pointers.
    
    The implementation is relatively trivial - we simply remember the heap
    top pointer before we revive garbage objects that have finalizers, and
    when dereferencing the pointer to the new object's location, where we
    check that the object is in the target heap, we also check that it's
    not in the portion of the heap where we've put the finalizable
    objects and anything they reference.
    
    Signed-off-by: felix <felix@call-with-current-continuation.org>

diff --git a/manual/Module (chicken gc) b/manual/Module (chicken gc)
index 48653e3a..25a29373 100644
--- a/manual/Module (chicken gc)	
+++ b/manual/Module (chicken gc)	
@@ -53,6 +53,12 @@ Multiple finalizers can be registered for the same object. The order
 in which the finalizers run is undefined. Execution of finalizers
 may be nested.
 
+NOTE: When a finalizable object has any weak references (i.e., weak
+locatives or weak pairs) to objects that are only reachable through it
+or other finalizable objects, those references will be broken like
+when the objects had already been collected.  This is done in order to
+avoid user code from accessing objects that are possibly in an
+invalid state.
 
 === force-finalizers
 
diff --git a/runtime.c b/runtime.c
index fbce94fd..6fdd7fca 100644
--- a/runtime.c
+++ b/runtime.c
@@ -543,8 +543,8 @@ static void C_fcall mark_nested_objects(C_byte *heap_scan_top, C_byte *tgt_space
 static void C_fcall mark_live_objects(C_byte *tgt_space_start, C_byte **tgt_space_top, C_byte *tgt_space_limit) C_regparm;
 static void C_fcall mark_live_heap_only_objects(C_byte *tgt_space_start, C_byte **tgt_space_top, C_byte *tgt_space_limit) C_regparm;
 static C_word C_fcall intern0(C_char *name) C_regparm;
-static void C_fcall update_weak_pairs(int mode) C_regparm;
-static void C_fcall update_locatives(int mode) C_regparm;
+static void C_fcall update_weak_pairs(int mode, C_byte *undead_start, C_byte *undead_end) C_regparm;
+static void C_fcall update_locatives(int mode, C_byte *undead_start, C_byte *undead_end) C_regparm;
 static LF_LIST *find_module_handle(C_char *name);
 static void set_profile_timer(C_uword freq);
 static void take_profile_sample();
@@ -3497,8 +3497,8 @@ C_regparm void C_fcall C_reclaim(void *trampoline, C_word c)
     count = (C_uword)C_fromspace_top - (C_uword)start;
     ++gc_count_1;
     ++gc_count_1_total;
-    update_locatives(GC_MINOR);
-    update_weak_pairs(GC_MINOR);
+    update_locatives(GC_MINOR, start, *tgt_space_top);
+    update_weak_pairs(GC_MINOR, start, *tgt_space_top);
   }
   else {
     /* Mark finalizer list and remember pointers to non-forwarded items: */
@@ -3585,8 +3585,8 @@ C_regparm void C_fcall C_reclaim(void *trampoline, C_word c)
       }
     }
 
-    update_locatives(gc_mode);
-    update_weak_pairs(gc_mode);
+    update_locatives(gc_mode, start, *tgt_space_top);
+    update_weak_pairs(gc_mode, start, *tgt_space_top);
 
     count = (C_uword)tospace_top - (C_uword)tospace_start; // Actual used, < heap_size/2
 
@@ -4007,8 +4007,8 @@ C_regparm void C_fcall C_rereclaim2(C_uword size, int relative_resize)
 
   /* Mark nested values in already moved (marked) blocks in breadth-first manner: */
   mark_nested_objects(start, new_tospace_start, &new_tospace_top, new_tospace_limit);
-  update_locatives(GC_REALLOC);
-  update_weak_pairs(GC_REALLOC);
+  update_locatives(GC_REALLOC, new_tospace_top, new_tospace_top);
+  update_weak_pairs(GC_REALLOC, new_tospace_top, new_tospace_top);
 
   heap_free (heapspace1, heapspace1_size);
   heap_free (heapspace2, heapspace2_size);
@@ -4053,10 +4053,11 @@ C_regparm void C_fcall C_rereclaim2(C_uword size, int relative_resize)
  * can *only* contain weak-pairs-turned-forwarding-pointer, we may
  * freely access the first slot of such forwarding pointers.
  */
-static C_regparm void C_fcall update_weak_pairs(int mode)
+static C_regparm void C_fcall update_weak_pairs(int mode, C_byte *undead_start, C_byte *undead_end)
 {
   int weakn = 0;
   C_word p, pair, car, h;
+  C_byte *car_ptr;
 
   /* NOTE: Don't use C_block_item() because it asserts the block is
    * big enough in DEBUGBUILD, but forwarding pointers have size 0.
@@ -4087,10 +4088,12 @@ static C_regparm void C_fcall update_weak_pairs(int mode)
       h = C_block_header(car);
     }
 
-    /* If the car is unreferenced by anyone else, it wasn't moved by GC.  So drop it: */
+    car_ptr = (C_byte *)(C_uword)car;
+    /* If the car is unreferenced by anyone else, it wasn't moved by GC.  Or, if it's in the "undead" portion of
+       the new heap, it was moved because it was only referenced by a revived finalizable object.  In either case, drop it: */
     if((mode == GC_MINOR && C_in_stackp(car)) ||
-       (mode == GC_MAJOR && (C_in_stackp(car) || C_in_fromspacep(car))) ||
-       (mode == GC_REALLOC && (C_in_stackp(car) || C_in_heapp(car)))) { /* NB: *old* heap! */
+       (mode == GC_MAJOR && (C_in_stackp(car) || C_in_fromspacep(car) || (car_ptr >= undead_start && car_ptr < undead_end))) ||
+       (mode == GC_REALLOC && (C_in_stackp(car) || C_in_heapp(car) || (car_ptr >= undead_start && car_ptr < undead_end)))) { /* NB: *old* heap! */
 
       C_set_block_item(pair, 0, C_SCHEME_BROKEN_WEAK_PTR);
       ++weakn;
@@ -4109,10 +4112,11 @@ static C_regparm void C_fcall update_weak_pairs(int mode)
  * so the updating of that pointer is not handled by the GC proper
  * (which only deals with full objects).
  */
-static C_regparm void C_fcall update_locatives(int mode)
+static C_regparm void C_fcall update_locatives(int mode, C_byte *undead_start, C_byte *undead_end)
 {
   int weakn = 0;
   C_word p, loc, ptr, obj, h, offset;
+  C_byte *obj_ptr;
 
   for (p = locative_chain; p != (C_word)NULL; p = *((C_word *)C_data_pointer(p))) {
     h = C_block_header(p);
@@ -4136,10 +4140,12 @@ static C_regparm void C_fcall update_locatives(int mode)
       h = C_block_header(obj);
     }
 
-    /* If the object is unreferenced by anyone else, it wasn't moved by GC.  So drop it: */
+    obj_ptr = (C_byte *)(C_uword)obj;
+    /* If the object is unreferenced by anyone else, it wasn't moved by GC.  Or, if it's in the "undead" portion of
+       the new heap, it was moved because it was only referenced by a revived finalizable object.  In either case, drop it: */
     if((mode == GC_MINOR && C_in_stackp(obj)) ||
-       (mode == GC_MAJOR && (C_in_stackp(obj) || C_in_fromspacep(obj))) ||
-       (mode == GC_REALLOC && (C_in_stackp(obj) || C_in_heapp(obj)))) { /* NB: *old* heap! */
+       (mode == GC_MAJOR && (C_in_stackp(obj) || C_in_fromspacep(obj) || (obj_ptr >= undead_start && obj_ptr < undead_end))) ||
+       (mode == GC_REALLOC && (C_in_stackp(obj) || C_in_heapp(obj) || (obj_ptr >= undead_start && obj_ptr < undead_end)))) { /* NB: *old* heap! */
 
       /* NOTE: This does *not* use BROKEN_WEAK_POINTER.  This slot
        * holds an unaligned raw C pointer, not a Scheme object */
diff --git a/tests/weak-pointer-test.scm b/tests/weak-pointer-test.scm
index d13ccfe2..9cb78312 100644
--- a/tests/weak-pointer-test.scm
+++ b/tests/weak-pointer-test.scm
@@ -204,4 +204,117 @@
     (test-error "Fifth locative is reclaimed" (locative-ref loc5))
     (test-equal "Sixth locative is NOT reclaimed" (locative-ref loc6) (vector-ref nested-held-onto-value 1))))
 
+
+(test-group "Testing that weak pairs get invalidated before finalizing would-be garbage"
+  (gc #t) ; Improve chances we don't get a minor GC in between
+  (let* ((not-held-onto-value (vector 42))
+	 (held-onto-value (vector 99))
+	 (garbage-a (vector (weak-cons not-held-onto-value '()) (weak-cons held-onto-value '()) #f))
+	 (garbage-b (vector (weak-cons not-held-onto-value '()) (weak-cons held-onto-value '()) #f))
+
+	 (garbage-a-weak-ref (weak-cons garbage-a '()))
+	 (garbage-b-weak-ref (weak-cons garbage-b '()))
+
+	 (observed-garbage-a-0 #f)
+	 (observed-garbage-a-1 #f)
+	 (observed-garbage-a-2 #f)
+	 (observed-garbage-b-0 #f)
+	 (observed-garbage-b-1 #f)
+	 (observed-garbage-b-2 #f))
+
+    ;; Garbage weakly references eachother
+    (vector-set! garbage-a 2 (weak-cons garbage-b '()))
+    (vector-set! garbage-b 2 (weak-cons garbage-a '()))
+
+    (set-finalizer! garbage-a (lambda (vec)
+				(set! observed-garbage-a-0 (car (vector-ref vec 0)))
+				(set! observed-garbage-a-1 (car (vector-ref vec 1)))
+				(set! observed-garbage-a-2 (car (vector-ref vec 2)))))
+    (set-finalizer! garbage-b (lambda (vec)
+				(set! observed-garbage-b-0 (car (vector-ref vec 0)))
+				(set! observed-garbage-b-1 (car (vector-ref vec 1)))
+				(set! observed-garbage-b-2 (car (vector-ref vec 2)))))
+
+    (set! not-held-onto-value #f)
+    (set! garbage-a #f)
+    (set! garbage-b #f)
+
+    ;; Must be a major collection, finalizers don't get queued on minor GC
+    ;; (gc #t)
+    ;; NOTE: The above won't work because it triggers *another* GC after running finalizers,
+    ;; which would invalidate all weak pairs anyway.  So instead, we create garbage until
+    ;; the finalizers have run.  This is more like what happens in a regular program.
+    (let lp ()
+      (unless (and observed-garbage-a-0 observed-garbage-b-0)
+	(make-vector 1000)
+	(lp)))
+
+    (test-assert "Weak pair's car which pointed to first garbage contains broken weak pointer" (bwp-object? (car garbage-a-weak-ref)))
+    (test-assert "Weak pair's car in first garbage which pointed to collected object contains broken weak pointer" (bwp-object? observed-garbage-a-0))
+    (test-equal "Weak pair's car in first garbage which pointed to retained object contains the original object" observed-garbage-a-1 held-onto-value)
+    (test-assert "Weak pair's car in first garbage which pointed to second garbage contains broken weak pointer" (bwp-object? observed-garbage-a-2))
+    (test-assert "Weak pair's car which pointed to second garbage contains broken weak pointer" (bwp-object? (car garbage-b-weak-ref)))
+    (test-assert "Weak pair's car in second garbage which pointed to collected object contains broken weak pointer" (bwp-object? observed-garbage-b-0))
+    (test-equal "Weak pair's car in second garbage which pointed to retained object contains the original object" observed-garbage-b-1 held-onto-value)
+    (test-assert "Weak pair's car in second garbage which pointed to first garbage contains broken weak pointer" (bwp-object? observed-garbage-b-2))))
+
+
+;; Safe version of locative-ref, returns !#bwp instead of raising an exception
+(define (weak-locative-ref loc)
+  (condition-case (locative-ref loc)
+    ((exn type) #!bwp)))
+
+(test-group "Testing that weak locatives get invalidated before finalizing would-be garbage"
+  (gc #t) ; Improve chances we don't get a minor GC in between
+  (let* ((not-held-onto-value (vector (vector 42)))
+	 (held-onto-value (vector (vector 99)))
+	 (garbage-a (vector (make-weak-locative not-held-onto-value 0) (make-weak-locative held-onto-value 0) #f))
+	 (garbage-b (vector (make-weak-locative not-held-onto-value 0) (make-weak-locative held-onto-value 0) #f))
+
+	 (garbage-a-weak-ref (make-weak-locative garbage-a 0))
+	 (garbage-b-weak-ref (make-weak-locative garbage-b 0))
+
+	 (observed-garbage-a-0 #f)
+	 (observed-garbage-a-1 #f)
+	 (observed-garbage-a-2 #f)
+	 (observed-garbage-b-0 #f)
+	 (observed-garbage-b-1 #f)
+	 (observed-garbage-b-2 #f))
+
+    ;; Garbage weakly references eachother
+    (vector-set! garbage-a 2 (make-weak-locative garbage-b 0))
+    (vector-set! garbage-b 2 (make-weak-locative garbage-a 0))
+
+    (set-finalizer! garbage-a (lambda (vec)
+				(set! observed-garbage-a-0 (weak-locative-ref (vector-ref vec 0)))
+				(set! observed-garbage-a-1 (weak-locative-ref (vector-ref vec 1)))
+				(set! observed-garbage-a-2 (weak-locative-ref (vector-ref vec 2)))))
+    (set-finalizer! garbage-b (lambda (vec)
+				(set! observed-garbage-b-0 (weak-locative-ref (vector-ref vec 0)))
+				(set! observed-garbage-b-1 (weak-locative-ref (vector-ref vec 1)))
+				(set! observed-garbage-b-2 (weak-locative-ref (vector-ref vec 2)))))
+
+    (set! not-held-onto-value #f)
+    (set! garbage-a #f)
+    (set! garbage-b #f)
+
+    ;; Must be a major collection, finalizers don't get queued on minor GC
+    ;; (gc #t)
+    ;; NOTE: The above won't work because it triggers *another* GC after running finalizers,
+    ;; which would invalidate all weak pairs anyway.  So instead, we create garbage until
+    ;; the finalizers have run.  This is more like what happens in a regular program.
+    (let lp ()
+      (unless (and observed-garbage-a-0 observed-garbage-b-0)
+	(make-vector 1000)
+	(lp)))
+
+    (test-assert "Weak locative which pointed to first garbage contains broken weak pointer" (bwp-object? (weak-locative-ref garbage-a-weak-ref)))
+    (test-assert "Weak locative in first garbage which pointed to collected object contains broken weak pointer" (bwp-object? observed-garbage-a-0))
+    (test-equal "Weak locative in first garbage which pointed to retained object contains the original object" observed-garbage-a-1 (vector-ref held-onto-value 0))
+    (test-assert "Weak locative in first garbage which pointed to second garbage contains broken weak pointer" (bwp-object? observed-garbage-a-2))
+    (test-assert "Weak locative which pointed to second garbage contains broken weak pointer" (bwp-object? (weak-locative-ref garbage-b-weak-ref)))
+    (test-assert "Weak locative in second garbage which pointed to collected object contains broken weak pointer" (bwp-object? observed-garbage-b-0))
+    (test-equal "Weak locative in second garbage which pointed to retained object contains the original object" observed-garbage-b-1 (vector-ref held-onto-value 0))
+    (test-assert "Weak locative in second garbage which pointed to first garbage contains broken weak pointer" (bwp-object? observed-garbage-b-2))))
+
 (test-exit)
Trap