~ chicken-core (master) 2b0ba37a7110ff8a78b59a04bda1018328a315d0
commit 2b0ba37a7110ff8a78b59a04bda1018328a315d0
Author: felix <felix@call-with-current-continuation.org>
AuthorDate: Thu Nov 20 14:04:10 2025 +0100
Commit: felix <felix@call-with-current-continuation.org>
CommitDate: Thu Nov 20 14:04:10 2025 +0100
added string-up/downcase, moved foldcase primitives
diff --git a/chicken.base.import.scm b/chicken.base.import.scm
index 20af60d2..0dec96d0 100644
--- a/chicken.base.import.scm
+++ b/chicken.base.import.scm
@@ -36,7 +36,6 @@
(butlast . chicken.base#butlast)
(case-sensitive . chicken.base#case-sensitive)
(char-name . chicken.base#char-name)
- (char-foldcase . chicken.base#char-foldcase)
(chop . chicken.base#chop)
(complement . chicken.base#complement)
(compose . chicken.base#compose)
@@ -94,7 +93,6 @@
(signum . chicken.base#signum)
(sleep . chicken.base#sleep)
(string->uninterned-symbol . chicken.base#string->uninterned-symbol)
- (string-foldcase . chicken.base#string-foldcase)
(sub1 . chicken.base#sub1)
(subvector . chicken.base#subvector)
(symbol-append . chicken.base#symbol-append)
diff --git a/chicken.h b/chicken.h
index 6bf400fc..f6403c40 100644
--- a/chicken.h
+++ b/chicken.h
@@ -1947,6 +1947,8 @@ C_fctexport C_word C_latin_to_utf(C_word from, C_word to, C_word start, C_word l
C_fctexport C_word C_utf_to_latin(C_word from, C_word to, C_word start, C_word len) C_regparm;
C_fctexport C_word C_utf_char_foldcase(C_word c) C_regparm;
C_fctexport C_word C_utf_string_foldcase(C_word from, C_word to, C_word len) C_regparm;
+C_fctexport C_word C_utf_string_downcase(C_word from, C_word to, C_word len) C_regparm;
+C_fctexport C_word C_utf_string_upcase(C_word from, C_word to, C_word len) C_regparm;
#ifdef C_WCHAR_FILENAMES
C_fctexport C_WCHAR *C_utf16(C_word bv, int cont) C_regparm;
C_fctexport C_char *C_utf8(C_WCHAR *str) C_regparm;
diff --git a/library.scm b/library.scm
index 3e1b584a..e0d2775c 100644
--- a/library.scm
+++ b/library.scm
@@ -267,6 +267,8 @@ EOF
string->vector vector->string textual-port? binary-port?
input-port-open? output-port-open? floor/ truncate/
exact inexact floor-remainder floor-quotient close-port
+
+ char-foldcase string-foldcase string-upcase string-downcase
;; The following procedures are overwritten in eval.scm:
eval interaction-environment null-environment
@@ -670,6 +672,36 @@ EOF
(and (not (eq? n 0))
(##core#inline "C_fixnum_difference" n 1))))
+;; case folding and conversion
+
+(define (char-foldcase c)
+ (##sys#check-char c 'char-foldcase)
+ (##core#inline "C_utf_char_foldcase" c))
+
+(define (string-foldcase str)
+ (##sys#check-string str 'string-foldcase)
+ (let* ((bv (##sys#slot str 0))
+ (n (##core#inline "C_fixnum_difference" (##sys#size bv) 1))
+ (buf (##sys#make-bytevector (##core#inline "C_fixnum_times" n 2)))
+ (len (##core#inline "C_utf_string_foldcase" bv buf n)))
+ (##sys#buffer->string buf 0 len)))
+
+(define (string-downcase str)
+ (##sys#check-string str 'string-downcase)
+ (let* ((bv (##sys#slot str 0))
+ (n (##core#inline "C_fixnum_difference" (##sys#size bv) 1))
+ (buf (##sys#make-bytevector (##core#inline "C_fixnum_times" n 2)))
+ (len (##core#inline "C_utf_string_downcase" bv buf n)))
+ (##sys#buffer->string buf 0 len)))
+
+(define (string-upcase str)
+ (##sys#check-string str 'string-upcase)
+ (let* ((bv (##sys#slot str 0))
+ (n (##core#inline "C_fixnum_difference" (##sys#size bv) 1))
+ (buf (##sys#make-bytevector (##core#inline "C_fixnum_times" n 2)))
+ (len (##core#inline "C_utf_string_upcase" bv buf n)))
+ (##sys#buffer->string buf 0 len)))
+
;;; Procedures:
(define (procedure? x) (##core#inline "C_i_closurep" x))
@@ -798,8 +830,6 @@ EOF
compress flatten intersperse join list-of? tail? constantly
complement compose conjoin disjoin each flip identity o
- char-foldcase string-foldcase
-
case-sensitive keyword-style parentheses-synonyms symbol-escape
on-exit exit exit-handler implicit-exit-handler emergency-exit
@@ -1080,21 +1110,6 @@ EOF
(else (loop (##sys#slot blst 1) (##sys#slot lst 1))) ) ) ) ) )
-;; case folding
-
-(define (char-foldcase c)
- (##sys#check-char c 'char-foldcase)
- (##core#inline "C_utf_char_foldcase" c))
-
-(define (string-foldcase str)
- (##sys#check-string str 'string-foldcase)
- (let* ((bv (##sys#slot str 0))
- (n (##core#inline "C_fixnum_difference" (##sys#size bv) 1))
- (buf (##sys#make-bytevector (##core#inline "C_fixnum_times" n 2)))
- (len (##core#inline "C_utf_string_foldcase" bv buf n)))
- (##sys#buffer->string buf 0 len)))
-
-
;;; Alists:
(define (alist-update! x y lst #!optional (cmp eqv?))
@@ -4926,7 +4941,10 @@ EOF
(else
(read-unreserved-char-0 port)
(loop (##sys#peek-char-0 port)
- (cons (if csp c (char-foldcase c)) lst) ) ) ) ) )
+ (cons (if csp
+ c
+ (##core#inline "C_utf_char_foldcase" c) )
+ lst) ) ) ) ) )
(define (r-digits)
(let loop ((c (##sys#peek-char-0 port)) (lst '()))
@@ -4989,7 +5007,10 @@ EOF
(loop (cons c lst) #f qtd))))
(else
(loop
- (cons (if csp c (char-foldcase c)) lst)
+ (cons (if csp
+ c
+ (##core#inline "C_utf_char_foldcase" c))
+ lst)
#f qtd)))))))))
(define (r-char)
diff --git a/manual/Module (chicken base) b/manual/Module (chicken base)
index b5e0dbe3..b1442c5b 100644
--- a/manual/Module (chicken base)
+++ b/manual/Module (chicken base)
@@ -611,23 +611,6 @@ A single value version of {{compose}} (slightly faster). {{(o)}} is equivalent
to {{identity}}.
-=== UNICODE case folding
-
-==== char-foldcase
-
-<procedure>(char-foldcase CHAR)</procedure>
-
-Performs simple UNICODE case folding to {{CHAR}}, language-specific mappings
-are not used.
-
-==== string-foldcase
-
-<procedure>(string-foldcase STRING)</procedure>
-
-Performs UNICODE case folding to {{STRING}}, language-specific mappings
-are not used. The resulting string may be longer than the original string.
-
-
=== User-defined named characters
==== char-name
diff --git a/types.db b/types.db
index 534fde28..f83f705f 100644
--- a/types.db
+++ b/types.db
@@ -653,6 +653,12 @@
(scheme#string-fill! (#(procedure #:enforce) scheme#string-fill! (string char #!optional fixnum fixnum) string))
(scheme#string (#(procedure #:clean #:enforce) scheme#string (#!rest char) string))
+(scheme#char-foldcase (#(procedure #:clean #:enforce #:foldable) scheme#char-foldcase (char) char)
+ ((char) (##core#inline "C_utf_char_foldcase" #(1))))
+(scheme#string-foldcase (#(procedure #:clean #:enforce) scheme#string-foldcase (string) string))
+(scheme#string-upcase (#(procedure #:clean #:enforce) scheme#string-upcase (string) string))
+(scheme#string-downcase (#(procedure #:clean #:enforce) scheme#string-downcase (string) string))
+
(scheme#vector? (#(procedure #:pure #:predicate vector) scheme#vector? (*) boolean))
;; special-cased (see scrutinizer.scm)
@@ -1108,10 +1114,6 @@
(chicken.base#join (#(procedure #:clean #:enforce) chicken.base#join ((list-of list) #!optional list) list))
(chicken.base#list-of? (#(procedure #:clean #:enforce) chicken.base#list-of? ((procedure (*) *)) (procedure (list) boolean)))
-(chicken.base#char-foldcase (#(procedure #:clean #:enforce #:foldable) chicken.base#char-foldcase (char) char)
- ((char) (##core#inline "C_utf_char_foldcase" #(1))))
-(chicken.base#string-foldcase (#(procedure #:clean #:enforce) chicken.base#string-foldcase (string) string))
-
(chicken.base#o (#(procedure #:clean #:enforce) chicken.base#o (#!rest (procedure (*) *)) (procedure (*) *)))
(chicken.base#rassoc
diff --git a/utf.c b/utf.c
index a4a42c5d..aaf7578a 100644
--- a/utf.c
+++ b/utf.c
@@ -3712,6 +3712,40 @@ C_regparm C_word C_utf_string_foldcase(C_word from, C_word to, C_word len)
return C_fix(pt - pt0);
}
+C_regparm C_word C_utf_string_downcase(C_word from, C_word to, C_word len)
+{
+ C_u32 c;
+ int e;
+ C_char *pf = C_c_string(from), *pf2;
+ C_char *pt = C_c_string(to), *pt0 = pt;
+ int count = C_unfix(len);
+ while(count > 0) {
+ pf2 = utf8_decode(pf, &c, &e);
+ if(!e) c = C_utf_char_downcase(c);
+ pt = utf8_encode(c, pt);
+ count -= pf2 - pf;
+ pf = pf2;
+ }
+ return C_fix(pt - pt0);
+}
+
+C_regparm C_word C_utf_string_upcase(C_word from, C_word to, C_word len)
+{
+ C_u32 c;
+ int e;
+ C_char *pf = C_c_string(from), *pf2;
+ C_char *pt = C_c_string(to), *pt0 = pt;
+ int count = C_unfix(len);
+ while(count > 0) {
+ pf2 = utf8_decode(pf, &c, &e);
+ if(!e) c = C_utf_char_upcase(c);
+ pt = utf8_encode(c, pt);
+ count -= pf2 - pf;
+ pf = pf2;
+ }
+ return C_fix(pt - pt0);
+}
+
#if defined(_WIN32) && !defined(__CYGWIN__)
#define C_WCHAR_FNBUF_SIZE 2048
static C_WCHAR fnbuf[ C_WCHAR_FNBUF_SIZE ], *pfnbuf;
Trap