~ chicken-core (chicken-5) f45780c647336fd0bfa6e95c8a347b7c5bdb0233
commit f45780c647336fd0bfa6e95c8a347b7c5bdb0233 Author: felix <felix@call-with-current-continuation.org> AuthorDate: Wed Jun 7 20:20:08 2023 +0200 Commit: Peter Bex <peter@more-magic.net> CommitDate: Sat Jun 24 15:35:44 2023 +0200 Allow string and character literals in SRFI-4 vector literals Signed-off-by: Peter Bex <peter@more-magic.net> diff --git a/NEWS b/NEWS index 9f519930..db9e7b5f 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,8 @@ "chicken.flonum" module (suggested by Christian Himpe). - The `process-execute` procedure now sets argv[0] to the unmodified filename. Previously, the directory part would be stripped. + - Added support for embedded strings and characters in SRFI-4 vector + literals. - Tools - The -R option for csi and csc now accepts list-notation like diff --git a/manual/Module srfi-4 b/manual/Module srfi-4 index 9eb00002..2f72ac0b 100644 --- a/manual/Module srfi-4 +++ b/manual/Module srfi-4 @@ -191,6 +191,23 @@ will set {{x}} to the object {{#u8(1 2 3)}}. Since CHICKEN 4.9.0, literal homoge `(,x #u8(1 2)) ; legal `#u8(1 ,x 2) ; illegal +Elements may also be characters or strings, in that case they are interpreted +as a sequence of numerical character codes. For example, + + '#u8(#\x7f "EL" #\F 2 1) + +is equivalent to + + '#u8(#\x7f #\x45 #\x4c #\x46 2 1) + +Character literals inside numeric vectors expand into the UTF-8 sequence of +the characters they represent, for strings the contained characters +are interpreted in whatever encoding is used for the text file or stream +in which the literal appears. + +Note that {{#u8"..."}} can be used as an abbreviation for the special case +{{#u8("...")}}. + === Predicates <procedure>(u8vector? OBJ)</procedure><br> diff --git a/srfi-4.scm b/srfi-4.scm index e8bddec5..8b990779 100644 --- a/srfi-4.scm +++ b/srfi-4.scm @@ -434,7 +434,7 @@ EOF ((##core#inline "C_eqp" p '()) v) (if (and (##core#inline "C_blockp" p) (##core#inline "C_pairp" p)) (,set v i (##core#inline "C_slot" p 0)) - (##sys#error-not-a-proper-list lst) ) ) ) ))))))) + (##sys#error-not-a-proper-list lst ',name) ) ) ) ))))))) (list->NNNvector u8vector) (list->NNNvector s8vector) @@ -612,6 +612,28 @@ EOF ;;; Read syntax: +(define (canonicalize-number-list! lst1) + (let loop ((lst lst1) (prev #f)) + (if (and (##core#inline "C_blockp" lst) + (##core#inline "C_pairp" lst)) + (let retry ((x (##sys#slot lst 0))) + (cond ((char? x) (retry (##sys#char->utf8-string x))) + ((string? x) + (if (eq? x "") + (loop (##sys#slot lst 1) prev) + (let loop2 ((ns (string->list x)) (prev prev)) + (let ((n (cons (char->integer (##sys#slot ns 0)) + (##sys#slot lst 1)))) + (if prev + (##sys#setslot prev 1 n) + (set! lst1 n)) + (let ((ns2 (##sys#slot ns 1))) + (if (null? ns2) + (loop (##sys#slot lst 1) n) + (loop2 (##sys#slot ns 1) n))))))) + (else (loop (##sys#slot lst 1) lst)))) + lst1))) + (set! ##sys#user-read-hook (let ([old-hook ##sys#user-read-hook] [read read] @@ -629,9 +651,15 @@ EOF (if (memq char '(#\u #\s #\f #\U #\S #\F)) (let* ([x (read port)] [tag (and (symbol? x) x)] ) - (cond [(or (eq? tag 'f) (eq? tag 'F)) #f] - [(memq tag consers) => (lambda (c) ((##sys#slot (##sys#slot c 1) 0) (read port)))] - [else (##sys#read-error port "illegal bytevector syntax" tag)] ) ) + (cond ((or (eq? tag 'f) (eq? tag 'F)) #f) + ((memq tag consers) => + (lambda (c) + (let ((val (read port))) + (if (string? val) + (set! val (map char->integer (string->list val))) + (set! val (canonicalize-number-list! val))) + ((##sys#slot (##sys#slot c 1) 0) val)))) + (else (##sys#read-error port "illegal bytevector syntax" tag)) ) ) (old-hook char port) ) ) ) ) diff --git a/tests/srfi-4-tests.scm b/tests/srfi-4-tests.scm index 10f3ce7c..7e0548cb 100644 --- a/tests/srfi-4-tests.scm +++ b/tests/srfi-4-tests.scm @@ -157,3 +157,17 @@ (assert (handle-exceptions exn #t (make-f64vector most-positive-fixnum) #f)) + +;; test special read-syntax + +(let ((cases '(("#u8(1 2 #\\A)" #u8(1 2 65)) + ("#u8(\"abc\")" #u8(97 98 99)) + ("#u8\"abc\"" #u8(97 98 99)) + ("#s8\"\"" #s8()) + ("#u64(\" \" #\\! 1 \"A\")" #u64(32 33 1 65)) + ("#u64(\" \" #\\! \"A\" 1)" #u64(32 33 65 1))))) + (do ((cs cases (cdr cs))) + ((null? cs)) + (let ((x (with-input-from-string (caar cs) read))) + (unless (equal? x (cadar cs)) + (error "failed" x (cadar cs))))))Trap