~ chicken-core (chicken-5) f45780c647336fd0bfa6e95c8a347b7c5bdb0233
commit f45780c647336fd0bfa6e95c8a347b7c5bdb0233
Author: felix <felix@call-with-current-continuation.org>
AuthorDate: Wed Jun 7 20:20:08 2023 +0200
Commit: Peter Bex <peter@more-magic.net>
CommitDate: Sat Jun 24 15:35:44 2023 +0200
Allow string and character literals in SRFI-4 vector literals
Signed-off-by: Peter Bex <peter@more-magic.net>
diff --git a/NEWS b/NEWS
index 9f519930..db9e7b5f 100644
--- a/NEWS
+++ b/NEWS
@@ -19,6 +19,8 @@
"chicken.flonum" module (suggested by Christian Himpe).
- The `process-execute` procedure now sets argv[0] to the unmodified
filename. Previously, the directory part would be stripped.
+ - Added support for embedded strings and characters in SRFI-4 vector
+ literals.
- Tools
- The -R option for csi and csc now accepts list-notation like
diff --git a/manual/Module srfi-4 b/manual/Module srfi-4
index 9eb00002..2f72ac0b 100644
--- a/manual/Module srfi-4
+++ b/manual/Module srfi-4
@@ -191,6 +191,23 @@ will set {{x}} to the object {{#u8(1 2 3)}}. Since CHICKEN 4.9.0, literal homoge
`(,x #u8(1 2)) ; legal
`#u8(1 ,x 2) ; illegal
+Elements may also be characters or strings, in that case they are interpreted
+as a sequence of numerical character codes. For example,
+
+ '#u8(#\x7f "EL" #\F 2 1)
+
+is equivalent to
+
+ '#u8(#\x7f #\x45 #\x4c #\x46 2 1)
+
+Character literals inside numeric vectors expand into the UTF-8 sequence of
+the characters they represent, for strings the contained characters
+are interpreted in whatever encoding is used for the text file or stream
+in which the literal appears.
+
+Note that {{#u8"..."}} can be used as an abbreviation for the special case
+{{#u8("...")}}.
+
=== Predicates
<procedure>(u8vector? OBJ)</procedure><br>
diff --git a/srfi-4.scm b/srfi-4.scm
index e8bddec5..8b990779 100644
--- a/srfi-4.scm
+++ b/srfi-4.scm
@@ -434,7 +434,7 @@ EOF
((##core#inline "C_eqp" p '()) v)
(if (and (##core#inline "C_blockp" p) (##core#inline "C_pairp" p))
(,set v i (##core#inline "C_slot" p 0))
- (##sys#error-not-a-proper-list lst) ) ) ) )))))))
+ (##sys#error-not-a-proper-list lst ',name) ) ) ) )))))))
(list->NNNvector u8vector)
(list->NNNvector s8vector)
@@ -612,6 +612,28 @@ EOF
;;; Read syntax:
+(define (canonicalize-number-list! lst1)
+ (let loop ((lst lst1) (prev #f))
+ (if (and (##core#inline "C_blockp" lst)
+ (##core#inline "C_pairp" lst))
+ (let retry ((x (##sys#slot lst 0)))
+ (cond ((char? x) (retry (##sys#char->utf8-string x)))
+ ((string? x)
+ (if (eq? x "")
+ (loop (##sys#slot lst 1) prev)
+ (let loop2 ((ns (string->list x)) (prev prev))
+ (let ((n (cons (char->integer (##sys#slot ns 0))
+ (##sys#slot lst 1))))
+ (if prev
+ (##sys#setslot prev 1 n)
+ (set! lst1 n))
+ (let ((ns2 (##sys#slot ns 1)))
+ (if (null? ns2)
+ (loop (##sys#slot lst 1) n)
+ (loop2 (##sys#slot ns 1) n)))))))
+ (else (loop (##sys#slot lst 1) lst))))
+ lst1)))
+
(set! ##sys#user-read-hook
(let ([old-hook ##sys#user-read-hook]
[read read]
@@ -629,9 +651,15 @@ EOF
(if (memq char '(#\u #\s #\f #\U #\S #\F))
(let* ([x (read port)]
[tag (and (symbol? x) x)] )
- (cond [(or (eq? tag 'f) (eq? tag 'F)) #f]
- [(memq tag consers) => (lambda (c) ((##sys#slot (##sys#slot c 1) 0) (read port)))]
- [else (##sys#read-error port "illegal bytevector syntax" tag)] ) )
+ (cond ((or (eq? tag 'f) (eq? tag 'F)) #f)
+ ((memq tag consers) =>
+ (lambda (c)
+ (let ((val (read port)))
+ (if (string? val)
+ (set! val (map char->integer (string->list val)))
+ (set! val (canonicalize-number-list! val)))
+ ((##sys#slot (##sys#slot c 1) 0) val))))
+ (else (##sys#read-error port "illegal bytevector syntax" tag)) ) )
(old-hook char port) ) ) ) )
diff --git a/tests/srfi-4-tests.scm b/tests/srfi-4-tests.scm
index 10f3ce7c..7e0548cb 100644
--- a/tests/srfi-4-tests.scm
+++ b/tests/srfi-4-tests.scm
@@ -157,3 +157,17 @@
(assert
(handle-exceptions exn #t
(make-f64vector most-positive-fixnum) #f))
+
+;; test special read-syntax
+
+(let ((cases '(("#u8(1 2 #\\A)" #u8(1 2 65))
+ ("#u8(\"abc\")" #u8(97 98 99))
+ ("#u8\"abc\"" #u8(97 98 99))
+ ("#s8\"\"" #s8())
+ ("#u64(\" \" #\\! 1 \"A\")" #u64(32 33 1 65))
+ ("#u64(\" \" #\\! \"A\" 1)" #u64(32 33 65 1)))))
+ (do ((cs cases (cdr cs)))
+ ((null? cs))
+ (let ((x (with-input-from-string (caar cs) read)))
+ (unless (equal? x (cadar cs))
+ (error "failed" x (cadar cs))))))
Trap