~ chicken-core (chicken-5) c73667e4d79e082eb90309d2d3ab53eb63f9cfad


commit c73667e4d79e082eb90309d2d3ab53eb63f9cfad
Author:     Peter Bex <peter@more-magic.net>
AuthorDate: Sun Jan 6 16:36:54 2019 +0100
Commit:     Peter Bex <peter@more-magic.net>
CommitDate: Sat Jan 12 15:25:17 2019 +0100

    When interning "qualified" symbols, convert them to regular symbols
    
    This is a preparatory step in fixing #1077.  Once we have a
    bootstrapping compiler that reads qualified symbols and interns them
    as "##foo#bar" instead of as "\003foobar", it will also no longer emit
    code with strings that are encoded in the old style.
    
    The hack must be in the interning step so that the new runtime will
    still be compatible with an old compiler: that compiler will still
    generate code like "\003foobar".  Without this hack, we'd get errors
    like "\003syscar is unbound" if we'd only change the reading of these
    symbols.
    
    Luckily, uninterned symbols don't matter for this.
    
    This is a total hack which is also very wasteful and dumb because it
    will malloc a new string whenever it encounters a qualified symbol.
    But that's only in the intermediate bootstrapping compiler: after that,
    when compiling a new CHICKEN, there should be no more dequalification
    happening.
    
    Signed-off-by: felix <felix@call-with-current-continuation.org>

diff --git a/NEWS b/NEWS
index 7d5f4d19..957760ad 100644
--- a/NEWS
+++ b/NEWS
@@ -27,6 +27,10 @@
     specification and also give sensible results on Windows.
   - Fix get-environment from (chicken process-context) to raise an
      error when passed #f instead of segfaulting.
+  - Qualified symbols (##foo#bar style) are no longer encoded by a
+     byte prefix inside the symbol name.  This ensures read-write
+     invariance of symbols which start with a low-byte character
+     (fixes #1077, except for keywords, which start with NUL bytes).
 
 
 5.0.0
diff --git a/library.scm b/library.scm
index 0891f6a4..d7dc35ad 100644
--- a/library.scm
+++ b/library.scm
@@ -2674,6 +2674,7 @@ EOF
   (##sys#intern-symbol str) )
 
 (define ##sys#symbol->string)
+;; DEPRECATED: Remove this once we have a new bootstrapping compiler
 (define ##sys#symbol->qualified-string)
 (define ##sys#qualified-symbol-prefix)
 
@@ -2693,6 +2694,7 @@ EOF
 	     [i (split str len)] )
 	(if i (##sys#substring str i len) str) ) ) )
 
+  ;; DEPRECATED: Remove this once we have a new bootstrapping compiler
   (set! ##sys#symbol->qualified-string 
     (lambda (s)
       (let* ([str (##sys#slot s 1)]
@@ -2702,6 +2704,7 @@ EOF
 	    (string-append "##" (##sys#substring str 1 i) "#" (##sys#substring str i len))
 	    str) ) ) )
 
+  ;; DEPRECATED: Remove this once we have a new bootstrapping compiler
   (set! ##sys#qualified-symbol-prefix 
     (lambda (s)
       (let* ([str (##sys#slot s 1)]
@@ -2709,18 +2712,12 @@ EOF
 	     [i (split str len)] )
 	(and i (##sys#substring str 0 i)) ) ) ) )
 
+;; DEPRECATED: Remove this once we have a new bootstrapping compiler
 (define (##sys#qualified-symbol? s)
   (let ((str (##sys#slot s 1)))
     (and (fx> (##sys#size str) 0)
 	 (fx<= (##sys#byte str 0) namespace-max-id-len))))
 
-(define ##sys#string->qualified-symbol
-  (lambda (prefix str)
-    (##sys#string->symbol
-     (if prefix
-	 (##sys#string-append prefix str)
-	 str) ) ) )
-
 (set! scheme#symbol->string
   (lambda (s)
     (##sys#check-symbol s 'symbol->string)
@@ -3710,7 +3707,6 @@ EOF
 
 (define ##sys#default-read-info-hook #f)
 (define ##sys#read-error-with-line-number #f)
-(define ##sys#enable-qualifiers #t)
 (define (##sys#read-prompt-hook) #f)	; just here so that srfi-18 works without eval
 (define (##sys#infix-list-hook lst) lst)
 
@@ -4152,24 +4148,8 @@ EOF
 			    (loop i) ) ) ) ) ) )
 
 	  (define (r-ext-symbol)
-	    (let* ([p (##sys#make-string 1)]
-		   [tok (r-token)] 
-		   [toklen (##sys#size tok)] )
-	      (unless ##sys#enable-qualifiers 
-		(##sys#read-error port "qualified symbol syntax is not allowed" tok) )
-	      (let loop ([i 0])
-		(cond [(fx>= i toklen)
-		       (##sys#read-error port "invalid qualified symbol syntax" tok) ]
-		      [(fx= (##sys#byte tok i) (char->integer #\#))
-		       (when (fx> i namespace-max-id-len)
-			 (set! tok (##sys#substring tok 0 namespace-max-id-len)) )
-		       (##sys#setbyte p 0 i)
-		       (##sys#intern-symbol
-			(string-append
-			 p 
-			 (##sys#substring tok 0 i)
-			 (##sys#substring tok (fx+ i 1) toklen)) ) ]
-		      [else (loop (fx+ i 1))] ) ) ) )
+	    (let ((tok (r-token)))
+	      (build-symbol (string-append "##" tok))))
 
 	  (define (build-symbol tok)
 	    (##sys#intern-symbol tok) )
@@ -4556,7 +4536,12 @@ EOF
 				      (eq? c #\-) )
 				  (not (##sys#string->number str)) )
 				 ((eq? c #\:) (not (eq? ksp #:prefix)))
-				 ((eq? c #\#) ;; #!rest, #!key etc
+				 ((and (eq? c #\#)
+				       ;; Not a qualified symbol?
+				       (not (and (fx> len 2)
+						 (eq? (##core#inline "C_subchar" str 1) #\#)
+						 (not (eq? (##core#inline "C_subchar" str 2) #\#)))))
+				  ;; #!rest, #!key etc
 				  (eq? (##core#inline "C_subchar" str 1) #\!))
 				 ((specialchar? c) #f)
 				 (else #t) ) )
diff --git a/repl.scm b/repl.scm
index 7d7ef771..4ec97efa 100644
--- a/repl.scm
+++ b/repl.scm
@@ -146,7 +146,6 @@
 		   (##sys#reset-handler
 		    (lambda ()
 		      (set! ##sys#read-error-with-line-number #f)
-		      (set! ##sys#enable-qualifiers #t)
 		      (resetports)
 		      (c #f)))))
 		(##sys#read-prompt-hook)
diff --git a/runtime.c b/runtime.c
index 2a5415fa..1ac6e4f8 100644
--- a/runtime.c
+++ b/runtime.c
@@ -597,6 +597,45 @@ C_dbg(C_char *prefix, C_char *fstr, ...)
   va_end(va);
 }
 
+/*
+ * Dequalify symbol string if necessary.  This is a temporary hack to
+ * ensure that all interned symbols are in the literal ##foo#bar
+ * style.  This enforces compatibility between a new runtime and code
+ * compiled by an older compiler which still generates \003foobar
+ * literal symbols.  This transition is needed to fix #1077.  Because
+ * of its temporary nature (ideally we just build a new bootstrapping
+ * compiler with this in which the hack should have no effect), we can
+ * afford to be stupidly wasteful and just malloc a new string every
+ * time we get here.
+ *
+ * DEPRECATED: Remove this once we have a new bootstrapping compiler
+ */
+static C_char *dequalified_symbol_string(C_char *str, int *len)
+{
+  C_char *deq_str;
+  int prefix = (int)str[0];
+
+  if (prefix >= 31) return str; /* namespace-max-id-len */
+  if (prefix == 0) return str; /* keyword (TODO: change this too) */
+
+  deq_str = malloc(*len+3);
+  if (deq_str == NULL) {
+    horror(C_text("cannot dequalify string - out of memory"));
+  }
+
+  deq_str[0] = '#';
+  deq_str[1] = '#';
+  memcpy(deq_str + 2, str + 1, prefix);
+  deq_str[prefix + 2] = '#';
+  memcpy(deq_str + prefix + 3, str + 1 + prefix, *len - prefix - 1);
+  deq_str[*len+2] = '\0'; /* Not always part of original str, but if it is, we must add it */
+  *len += 2;
+  if(debug_mode) {
+    C_dbg(C_text("debug"), C_text("Dequalified [%o]%.*s into %s\n"), str[0], len-3, str+1, deq_str);
+  }
+  return deq_str;
+}
+
 
 /* Startup code: */
 
@@ -1108,12 +1147,12 @@ void initialize_symbol_table(void)
   for(i = 0; i < symbol_table->size; symbol_table->table[ i++ ] = C_SCHEME_END_OF_LIST);
 
   /* Obtain reference to hooks for later: */
-  core_provided_symbol = C_intern2(C_heaptop, C_text("\004coreprovided"));
-  interrupt_hook_symbol = C_intern2(C_heaptop, C_text("\003sysinterrupt-hook"));
-  error_hook_symbol = C_intern2(C_heaptop, C_text("\003syserror-hook"));
-  callback_continuation_stack_symbol = C_intern3(C_heaptop, C_text("\003syscallback-continuation-stack"), C_SCHEME_END_OF_LIST);
-  pending_finalizers_symbol = C_intern2(C_heaptop, C_text("\003syspending-finalizers"));
-  current_thread_symbol = C_intern3(C_heaptop, C_text("\003syscurrent-thread"), C_SCHEME_FALSE);
+  core_provided_symbol = C_intern2(C_heaptop, C_text("##core#provided"));
+  interrupt_hook_symbol = C_intern2(C_heaptop, C_text("##sys#interrupt-hook"));
+  error_hook_symbol = C_intern2(C_heaptop, C_text("##sys#error-hook"));
+  callback_continuation_stack_symbol = C_intern3(C_heaptop, C_text("##sys#callback-continuation-stack"), C_SCHEME_END_OF_LIST);
+  pending_finalizers_symbol = C_intern2(C_heaptop, C_text("##sys#pending-finalizers"));
+  current_thread_symbol = C_intern3(C_heaptop, C_text("##sys#current-thread"), C_SCHEME_FALSE);
 }
 
 
@@ -2278,6 +2317,8 @@ C_regparm C_word C_fcall C_intern_in(C_word **ptr, int len, C_char *str, C_SYMBO
   int key;
   C_word s;
 
+  str = dequalified_symbol_string(str, &len);
+
   if(stable == NULL) stable = symbol_table;
 
   key = hash_string(len, str, stable->size, stable->rand, 0);
@@ -2299,6 +2340,8 @@ C_regparm C_word C_fcall C_h_intern_in(C_word *slot, int len, C_char *str, C_SYM
   int key;
   C_word s;
 
+  str = dequalified_symbol_string(str, &len);
+
   if(stable == NULL) stable = symbol_table;
 
   key = hash_string(len, str, stable->size, stable->rand, 0);
@@ -2322,6 +2365,8 @@ C_regparm C_word C_fcall C_h_intern_in(C_word *slot, int len, C_char *str, C_SYM
 C_regparm C_word C_fcall intern0(C_char *str)
 {
   int len = C_strlen(str);
+  str = dequalified_symbol_string(str, &len);
+
   int key = hash_string(len, str, symbol_table->size, symbol_table->rand, 0);
   C_word s;
 
@@ -2335,10 +2380,11 @@ C_regparm C_word C_fcall C_lookup_symbol(C_word sym)
   int key;
   C_word str = C_block_item(sym, 1);
   int len = C_header_size(str);
+  C_char *the_str = dequalified_symbol_string(C_c_string(str), &len);
 
-  key = hash_string(len, C_c_string(str), symbol_table->size, symbol_table->rand, 0);
+  key = hash_string(len, the_str, symbol_table->size, symbol_table->rand, 0);
 
-  return lookup(key, len, C_c_string(str), symbol_table);
+  return lookup(key, len, the_str, symbol_table);
 }
 
 
@@ -5866,7 +5912,7 @@ void C_ccall C_signum(C_word c, C_word *av)
   } else if (C_truep(C_bignump(x))) {
     C_kontinue(k, C_bignum_negativep(x) ? C_fix(-1) : C_fix(1));
   } else {
-    try_extended_number("\003sysextended-signum", 2, k, x);
+    try_extended_number("##sys#extended-signum", 2, k, x);
   }
 }
 
@@ -9948,6 +9994,7 @@ void C_ccall C_string_to_symbol(C_word c, C_word *av)
     
   len = C_header_size(string);
   name = (C_char *)C_data_pointer(string);
+  name = dequalified_symbol_string(name, &len);
   key = hash_string(len, name, symbol_table->size, symbol_table->rand, 0);
 
   if(!C_truep(s = lookup(key, len, name, symbol_table))) 
@@ -10412,7 +10459,7 @@ void C_ccall C_number_to_string(C_word c, C_word *av)
     C_integer_to_string(c, av); /* reuse av */
   } else {
     C_word k = av[ 1 ];
-    try_extended_number("\003sysextended-number->string", 3, k, num, radix);
+    try_extended_number("##sys#extended-number->string", 3, k, num, radix);
   }
 }
 
@@ -10533,7 +10580,7 @@ void C_ccall C_integer_to_string(C_word c, C_word *av)
     if (len > C_RECURSIVE_TO_STRING_THRESHOLD &&
         /* The power of two fast path is much faster than recursion */
         ((C_uword)1 << radix_shift) != radix) {
-      try_extended_number("\003sysinteger->string/recursive",
+      try_extended_number("##sys#integer->string/recursive",
                           4, k, num, C_fix(radix), C_fix(len));
     } else {
       C_word kab[C_SIZEOF_CLOSURE(4)], *ka = kab, kav[6];
Trap