~ chicken-core (chicken-5) 62fcf64a9d6f311730abfae7c664b1a29950f318


commit 62fcf64a9d6f311730abfae7c664b1a29950f318
Author:     Peter Bex <peter@more-magic.net>
AuthorDate: Fri Jun 9 08:17:13 2023 +0200
Commit:     Peter Bex <peter@more-magic.net>
CommitDate: Fri Jun 9 08:17:13 2023 +0200

    Enable minor collection of weak pairs
    
    In the initial weak symbol GC implementation, we added a temporary
    hack in mark_nested_objects() to skip weak pairs in minor GC.
    
    This was required because back then, we still had to scan the entire
    symbol table in order to correctly drop GCed symbols.  Now that we're
    not doing that anymore, we can collect weak pairs in minor GCs too.
    This allows us to drop said hack.  We also can drop an extra check in
    really_mark which was added to prevent adding weak pairs to the weak
    pair chain in minor GC.
    
    Drop the assert in update_weak_pairs and refactor the liveness checks
    to be more comprehensive (the old checks didn't account for pairs
    allocated in static memory or malloced memory - not likely, but might
    be possible now they're user-facing), and to include stack checks for
    the minor GC.
    
    Change the weak pointer tests not to do major collections, and add
    (slightly paranoid) checks that "permanent" symbols are kept around.
    They're paranoid because permanent symbols don't actually live in
    static memory - only their strings do.  So they should not result
    in broken weak pointers anyway.
    
    While working on this, I noticed that there's a small but odd
    limitation - when using set-car! on a weak pair, depending on where
    the pair and the car value live, we add it to the mutation stack,
    which gets marked unconditionally.  Therefore, the value will be kept
    alive on the next GC even if nothing else holds onto it.
    
    To avoid this, we would need to know in C_mutate whether the slot
    we're mutating is a GC-managed slot (ie it is not the first slot of a
    "special block").  We don't have this information there, and making it
    available would entail a massive API change.  This is not worth the
    hassle, so document this minor limitation, instead.

diff --git a/manual/Module (chicken base) b/manual/Module (chicken base)
index d26ba5ad..11d9c441 100644
--- a/manual/Module (chicken base)	
+++ b/manual/Module (chicken base)	
@@ -176,6 +176,10 @@ They're the same as regular pairs for all intents and purposes.
 However, there's a {{weak-pair?}} predicate which ''can'' distinguish
 between regular pairs and weak pairs.
 
+NOTE: Due to internal limitations, {{set-car!}} on a weak pair
+currently may cause it to hold onto the value for one more GC cycle in
+some situations.
+
 ==== weak-cons
 
 <procedure>(weak-cons obj[1] obj[2])</procedure><br>
diff --git a/runtime.c b/runtime.c
index 755a9bec..5bbf2760 100644
--- a/runtime.c
+++ b/runtime.c
@@ -3503,6 +3503,7 @@ C_regparm void C_fcall C_reclaim(void *trampoline, C_word c)
     ++gc_count_1;
     ++gc_count_1_total;
     update_locative_table(GC_MINOR);
+    update_weak_pairs(GC_MINOR);
   }
   else {
     /* Mark finalizer list and remember pointers to non-forwarded items: */
@@ -3818,11 +3819,8 @@ static C_regparm void C_fcall mark_nested_objects(C_byte *heap_scan_top, C_byte
 
     if(n > 0 && (h & C_BYTEBLOCK_BIT) == 0) {
       if(h & C_SPECIALBLOCK_BIT) {
-        /* Minor GC needs to be fast; always mark weakly held symbols */
-        if (gc_mode != GC_MINOR || h != C_WEAK_PAIR_TAG) {
-	  --n;
-	  ++p;
-        }
+	--n;
+	++p;
       }
 
       while(n--) mark(p++);
@@ -3906,7 +3904,7 @@ static C_regparm void C_fcall really_mark(C_word *x, C_byte *tgt_space_start, C_
   p2->header = h;
   p->header = ptr_to_fptr((C_uword)p2);
   C_memcpy(p2->data, p->data, bytes);
-  if (h == C_WEAK_PAIR_TAG && gc_mode != GC_MINOR && !C_immediatep(p2->data[0])) {
+  if (h == C_WEAK_PAIR_TAG && !C_immediatep(p2->data[0])) {
     p->data[0] = weak_pair_chain; /* "Recycle" the weak pair's CAR to point to prev head */
     weak_pair_chain = (C_word)p;  /* Make this fwd ptr the new head of the weak pair chain */
   }
@@ -4167,17 +4165,6 @@ static C_regparm void C_fcall update_weak_pairs(int mode)
   int weakn = 0;
   C_word p, pair, car, h;
 
-  if(gc_mode == GC_MINOR) {
-    /* For now, we always mark weak pairs in major/realloc GC.
-     * Perhaps we can drop that constraint if we move the pruning of
-     * symbol table buckets to C_h_intern()/lookup().
-     * Then only weak pairs in the nursery get put in the chain,
-     * which should be very few.  NOTE: What about mutation?
-     */
-    assert(weak_pair_chain == (C_word)NULL);
-    return;
-  }
-
   /* NOTE: Don't use C_block_item() because it asserts the block is
    * big enough in DEBUGBUILD, but forwarding pointers have size 0.
    */
@@ -4188,22 +4175,25 @@ static C_regparm void C_fcall update_weak_pairs(int mode)
       h = C_block_header(pair);
     } while (is_fptr(h));
 
-    assert((mode == GC_REALLOC ?
-            C_in_new_heapp(pair) :
-            !C_in_fromspacep(pair)));
+    /* The pair itself should be live */
+    assert((mode == GC_MINOR && !C_in_stackp(pair)) ||
+           (mode == GC_MAJOR && !C_in_stackp(pair) && !C_in_fromspacep(pair)) ||
+           (mode == GC_REALLOC && !C_in_stackp(pair) && !C_in_heapp(pair))); /* NB: *old* heap! */
 
     car = C_block_item(pair, 0);
-    assert(!C_immediatep(car));
+    if (car == C_SCHEME_BROKEN_WEAK_PTR) continue; /* Already processed (should not happen!) */
+
+    assert(!C_immediatep(car)); /* should be ensured when adding it to the chain */
     h = C_block_header(car);
     while (is_fptr(h)) {
       car = fptr_to_ptr(h);
       h = C_block_header(car);
     }
 
-    /* If the car is unreferenced, drop it: */
-    if(mode == GC_REALLOC ?
-       !C_in_new_heapp(car) :
-       C_in_fromspacep(car)) {
+    /* If the car is unreferenced by anyone else, it wasn't moved by GC.  So drop it: */
+    if((mode == GC_MINOR && C_in_stackp(car)) ||
+       (mode == GC_MAJOR && (C_in_stackp(car) || C_in_fromspacep(car))) ||
+       (mode == GC_REALLOC && (C_in_stackp(car) || C_in_heapp(car)))) { /* NB: *old* heap! */
 
       C_set_block_item(pair, 0, C_SCHEME_BROKEN_WEAK_PTR);
       ++weakn;
diff --git a/tests/weak-pointer-test.scm b/tests/weak-pointer-test.scm
index fa151e77..e53f2ade 100644
--- a/tests/weak-pointer-test.scm
+++ b/tests/weak-pointer-test.scm
@@ -43,16 +43,19 @@
       (test-equal "an improper weak list is read back as regular improper list" my-improper-list reread-improper-weak-list equal?))))
 
 (test-group "Testing that basic weak pairs get their car reclaimed"
+  (gc #t) ; Improve chances we don't get a minor GC in between
   (let* ((not-held-onto-value (vector 42))
 	 (held-onto-vector (vector 'this-one-stays))
+
 	 (weak-list (weak-cons not-held-onto-value
 			       (weak-cons (vector 'ohai)
-					  (weak-cons held-onto-vector '())))))
+					  (weak-cons held-onto-vector '()))))
+	 (weak-immediate-pair (weak-cons 1 2)))
 
     ;; break other references to the values
     (set! not-held-onto-value #f)
 
-    (gc #t)
+    (gc)
 
     ;; First item is reclaimed
     (test-assert "first item of weak list is reclaimed" (not (vector? (car weak-list))))
@@ -65,10 +68,37 @@
     ;; Third item stays
     (test-assert "third item of weak list is kept around due to other references existing" (vector? (caddr weak-list)))
     (test-equal "third item of weak list is identical to the other reference" (caddr weak-list) held-onto-vector)
-    (test-assert "third item of weak list is not set to the broken-weak-pointer object" (not (bwp-object? (caddr weak-list))))))
+    (test-assert "third item of weak list is not set to the broken-weak-pointer object" (not (bwp-object? (caddr weak-list))))
+
+    (test-equal "weak car is kept around when value is an immediate" (car weak-immediate-pair) 1)
+    (test-equal "weak cdr is kept around when value is an immediate" (cdr weak-immediate-pair) 2)))
+
+
+(test-group "Testing that weak pairs do not get broken when holding permanent symbols"
+  (gc #t) ; Improve chances we don't get a minor GC in between
+
+  ;; NOTE: When we don't use string-append here, the strings somehow get interned as (permanent) symbols?!
+  ;; Perhaps this is somehow caused by the reader.
+  (let* ((sym1 (string->symbol (string-append "something" "1234")))
+	 (sym2 (string->symbol (string-append "another" "1234")))
+	 (weak-permanent-symbol-pair (weak-cons 'scheme#car 'scheme#cdr))
+	 (weak-impermanent-symbol-pair (weak-cons sym1 sym2)))
+
+    (set! sym1 #f)
+    (set! sym2 #f)
+
+    (gc)
+
+    (test-equal "weak car is kept around when value is a \"permanent\" symbol" (car weak-permanent-symbol-pair) 'scheme#car)
+    (test-equal "weak cdr is kept around when value is a \"permanent\" symbol" (cdr weak-permanent-symbol-pair) 'scheme#cdr)
+
+    (test-assert "weak car is reclaimed when value is an \"impermanent\" symbol" (not (symbol? (car weak-impermanent-symbol-pair))))
+    (test-assert "weak car is reclaimed when value is an \"impermanent\" symbol" (bwp-object? (car weak-impermanent-symbol-pair)))
+    (test-equal "weak cdr is kept around when value is a \"impermanent\" symbol" (cdr weak-impermanent-symbol-pair) (string->symbol (string-append "an" "other1234")))))
 
 
 (test-group "Testing cars of weak pairs referenced by their cdr do not get collected"
+  (gc #t) ; Improve chances we don't get a minor GC in between
   (let* ((obj-a (vector 42))
 	 (ref-a (weak-cons obj-a obj-a))
 	 (obj-b (vector 'ohai))
@@ -80,7 +110,7 @@
     (set! obj-a #f)
     (set! obj-b #f)
 
-    (gc #t)
+    (gc)
 
     (test-assert "object in first weak cons is still kept around in car" (vector? (car ref-a)))
     (test-assert "object in first weak cons is still kept around in cdr" (vector? (cdr ref-a)))
Trap