~ chicken-core (chicken-5) 7f5f51356040cd8a1c0a7f9243214bf4c3ba757a


commit 7f5f51356040cd8a1c0a7f9243214bf4c3ba757a
Author:     Peter Bex <peter@more-magic.net>
AuthorDate: Fri Jun 16 19:54:42 2017 +0200
Commit:     felix <felix@call-with-current-continuation.org>
CommitDate: Thu Jun 22 14:15:19 2017 +0200

    Fix some edge cases with symbol GC
    
    Symbols are never statically allocated, their name strings are, so the
    permanentp() check in update_symbol_tables was bogus.  It might
    trigger collection of a symbol even though it would be statically
    allocated.
    
    This could potentially cause problems when generated C code accessed a
    symbol global through lf[...], for example, because its symbol would
    have been collected.
    
    Instead, we now ensure persistence in add_symbol, based on whether the
    symbol's string name is in non-GCable memory, and C_i_unpersist_symbol
    will now also check the string for being GCable before unpersisting.
    
    This commit also adds a very paranoid check to update_symbol_tables
    which detects such edge cases.  This check found more edge cases:
    
    - When a symbol is immediately given a value in C_intern3, it should
       also be persisted.
    - If a static symbol is to be generated by C_h_intern, but it has
       already been created in the heap (a practical example is the
       pending-finalizers symbol), there is no guarantee that it will
       stick around after GC.  So, if a symbol is found already in the
       symbol table, and its string name isn't statically allocated,
       we replace it by a newly allocated static string, to ensure that
       the code which created it won't allow the symbol to get GCed.
    - The code in eval.scm had a problem if a GC happened in between
       persisting the symbol and actually assigning the value.  This is
       not a problem in practice, but persisting it after calculating
       the value is cleaner.  We also use the inline operator for
       performance and to avoid a GC in between persisting and assigning.
    
    Signed-off-by: felix <felix@call-with-current-continuation.org>

diff --git a/eval.scm b/eval.scm
index 2c62136e..411fb9c5 100644
--- a/eval.scm
+++ b/eval.scm
@@ -269,8 +269,9 @@
 					     (lambda (v)
 					       (##sys#error 'eval "environment is not mutable" evalenv var)) ;XXX var?
 					     (lambda (v)
-					       (##sys#persist-symbol var)
-					       (##sys#setslot var 0 (##core#app val v))))))
+					       (let ((result (##core#app val v)))
+						 (##core#inline "C_i_persist_symbol" var)
+						 (##sys#setslot var 0 result))))))
 				      ((zero? i) (lambda (v) (##sys#setslot (##sys#slot v 0) j (##core#app val v))))
 				      (else
 				       (lambda (v)
diff --git a/library.scm b/library.scm
index 9da4ef93..7f0d60ad 100644
--- a/library.scm
+++ b/library.scm
@@ -275,7 +275,6 @@ EOF
 (define ##sys#gc (##core#primitive "C_gc"))
 (define (##sys#setslot x i y) (##core#inline "C_i_setslot" x i y))
 (define (##sys#setislot x i y) (##core#inline "C_i_set_i_slot" x i y))
-(define (##sys#persist-symbol s) (##core#inline "C_i_persist_symbol" s))
 (define ##sys#allocate-vector (##core#primitive "C_allocate_vector"))
 (define (argc+argv) (##sys#values main_argc main_argv))
 (define ##sys#make-structure (##core#primitive "C_make_structure"))
diff --git a/runtime.c b/runtime.c
index 1d2e7502..64020e24 100644
--- a/runtime.c
+++ b/runtime.c
@@ -2279,8 +2279,11 @@ C_regparm C_word C_fcall C_intern_in(C_word **ptr, int len, C_char *str, C_SYMBO
 
 C_regparm C_word C_fcall C_h_intern_in(C_word *slot, int len, C_char *str, C_SYMBOL_TABLE *stable)
 {
-  /* Intern as usual, but remember slot, if looked up symbol is in nursery.
-     also: allocate in static memory. */
+  /* Intern as usual, but remember slot, and allocate in static
+   * memory.  If symbol already exists, replace its string by a fresh
+   * statically allocated string to ensure it never gets collected, as
+   * lf[] entries are not tracked by the GC.
+   */
   int key;
   C_word s;
 
@@ -2291,6 +2294,11 @@ C_regparm C_word C_fcall C_h_intern_in(C_word *slot, int len, C_char *str, C_SYM
   if(C_truep(s = lookup(key, len, str, stable))) {
     if(C_in_stackp(s)) C_mutate_slot(slot, s);
     
+    if(!C_truep(C_permanentp(C_symbol_name(s)))) {
+      /* Replace by statically allocated string, and persist it */
+      C_set_block_item(s, 1, C_static_string(C_heaptop, len, str));
+      C_i_persist_symbol(s);
+    }
     return s;
   }
 
@@ -2333,6 +2341,7 @@ C_regparm C_word C_fcall C_intern3(C_word **ptr, C_char *str, C_word value)
   C_word s = C_intern_in(ptr, C_strlen(str), str, symbol_table);
   
   C_mutate2(&C_block_item(s,0), value);
+  C_i_persist_symbol(s); /* Symbol has a value now; persist it */
   return s;
 }
 
@@ -2385,7 +2394,8 @@ C_regparm C_word C_fcall C_i_persist_symbol(C_word sym)
 }
 
 /* Possibly remove "persistence" of symbol, to allowed it to be GC'ed.
- * This is only done if the symbol is unbound and has an empty plist.
+ * This is only done if the symbol is unbound, has an empty plist and
+ * is allocated in managed memory.
  */
 C_regparm C_word C_fcall C_i_unpersist_symbol(C_word sym)
 {
@@ -2393,7 +2403,10 @@ C_regparm C_word C_fcall C_i_unpersist_symbol(C_word sym)
 
   C_i_check_symbol(sym);
 
-  if (C_persistable_symbol(sym)) return C_SCHEME_FALSE;
+  if (C_persistable_symbol(sym) ||
+      C_truep(C_permanentp(C_symbol_name(sym)))) {
+    return C_SCHEME_FALSE;
+  }
 
   bucket = lookup_bucket(sym, NULL);
   if (C_truep(bucket)) { /* It could be an uninterned symbol(?) */
@@ -2464,7 +2477,13 @@ C_word add_symbol(C_word **ptr, C_word key, C_word string, C_SYMBOL_TABLE *stabl
   C_set_block_item(sym, 2, C_SCHEME_END_OF_LIST);
   *ptr = p;
   b2 = stable->table[ key ];	/* previous bucket */
-  bucket = C_a_weak_pair(ptr, sym, b2); /* create new bucket */
+
+  /* Create new weak or strong bucket depending on persistability */
+  if (C_persistable_symbol(sym) || C_truep(C_permanentp(string))) {
+    bucket = C_a_pair(ptr, sym, b2);
+  } else {
+    bucket = C_a_weak_pair(ptr, sym, b2);
+  }
 
   if(ptr != C_heaptop) C_mutate_slot(&stable->table[ key ], bucket);
   else {
@@ -4138,10 +4157,41 @@ C_regparm void C_fcall update_symbol_tables(int mode)
 
 	assert((h & C_HEADER_TYPE_BITS) == C_SYMBOL_TYPE);
 
+#ifdef DEBUGBUILD
+        /* Detect inconsistencies before dropping / keeping the symbol */
+	{
+	  C_word str = C_symbol_name(sym);
+          int str_perm;
+
+	  h = C_block_header(str);
+
+	  while(is_fptr(h)) {
+	    str = fptr_to_ptr(h);
+	    h = C_block_header(str);
+	  }
+
+          str_perm = !C_in_stackp(str) && !C_in_heapp(str) &&
+                  !C_in_scratchspacep(str) &&
+                  (mode == GC_REALLOC ? !C_in_new_heapp(str) : 1);
+
+	  if ((C_persistable_symbol(sym) || str_perm) &&
+              (C_block_header(bucket) == C_WEAK_PAIR_TAG)) {
+	    C_dbg(C_text("GC"), C_text("Offending symbol: `%.*s'\n"),
+		  (int)C_header_size(str), C_c_string(str));
+	    panic(C_text("Persistable symbol found in weak pair"));
+	  } else if (!C_persistable_symbol(sym) && !str_perm &&
+		     (C_block_header(bucket) == C_PAIR_TAG)) {
+	    C_dbg(C_text("GC"), C_text("Offending symbol: `%.*s'...\n"),
+		  (int)C_header_size(str), C_c_string(str));
+	    panic(C_text("Unpersistable symbol found in strong pair"));
+	  }
+	}
+#endif
+
 	/* If the symbol is unreferenced, drop it: */
-	if(!C_truep(C_permanentp(sym)) && (mode == GC_REALLOC ?
-					   !C_in_new_heapp(sym) :
-					   !C_in_fromspacep(sym))) {
+	if(mode == GC_REALLOC ?
+           !C_in_new_heapp(sym) :
+           !C_in_fromspacep(sym)) {
 
 	  if(last) C_set_block_item(last, 1, C_block_item(bucket,1));
 	  else stp->table[ i ] = C_block_item(bucket,1);
Trap