~ chicken-core (master) /tests/unicode-tests.scm


  1;;; unicode tests, taken from Chibi
  2
  3(import (chicken port) (chicken sort))
  4(import (chicken string) (chicken io))
  5(import (chicken bytevector))
  6(import (only (scheme base) write-string))
  7
  8(include "test.scm")
  9                           
 10(test-begin "scheme")
 11
 12(test-equal #\Р (string-ref "Русский" 0))
 13(test-equal #\и (string-ref "Русский" 5))
 14(test-equal #\й (string-ref "Русский" 6))
 15
 16(test-equal 7 (string-length "Русский"))
 17
 18(test-equal #\日 (string-ref "日本語" 0))
 19(test-equal #\本 (string-ref "日本語" 1))
 20(test-equal #\語 (string-ref "日本語" 2))
 21
 22(test-equal 3 (string-length "日本語"))
 23
 24(test-equal '(#\日 #\本 #\語) (string->list "日本語"))
 25(test-equal "日本語" (list->string '(#\日 #\本 #\語)))
 26
 27(test-equal "日本" (substring "日本語" 0 2))
 28(test-equal "本語" (substring "日本語" 1 3))
 29
 30(test-equal "日-語"
 31      (let ((s (substring "日本語" 0 3)))
 32        (string-set! s 1 #\-)
 33        s))
 34
 35(test-equal "日本人"
 36      (let ((s (substring "日本語" 0 3)))
 37        (string-set! s 2 #\人)
 38        s))
 39
 40(test-equal "字字字" (make-string 3 #\字))
 41
 42(test-equal "字字字"
 43      (let ((s (make-string 3)))
 44        (string-fill! s #\字)
 45        s))
 46
 47; tests from the utf8 egg:
 48
 49(test-equal 2 (string-length "漢字"))
 50
 51(test-equal 28450 (char->integer (string-ref "漢字" 0)))
 52
 53(define str (string-copy "漢字"))
 54
 55(test-equal "赤字" (begin (string-set! str 0 (string-ref "赤" 0)) str))
 56
 57(test-equal "赤外" (begin (string-set! str 1 (string-ref "外" 0)) str))
 58
 59(test-equal "赤x" (begin (string-set! str 1 #\x) str))
 60
 61(test-equal "赤々" (begin (string-set! str 1 (string-ref "々" 0)) str))
 62
 63(test-equal "文字列" (substring "文字列" 0))
 64(test-equal "字列" (substring "文字列" 1))
 65(test-equal "列" (substring "文字列" 2))
 66(test-equal "文" (substring "文字列" 0 1))
 67(test-equal "字" (substring "文字列" 1 2))
 68(test-equal "文字" (substring "文字列" 0 2))
 69
 70(define *string* "文字列")
 71(define *list* '("文" "字" "列"))
 72(define *chars* '(25991 23383 21015))
 73
 74(test-equal *chars* (map char->integer (string->list "文字列")))
 75
 76(test-equal *list* (map string (map integer->char *chars*)))
 77
 78(test-equal *string* (list->string (map integer->char '(25991 23383 21015))))
 79
 80(test-equal "列列列" (make-string 3 (string-ref "列" 0)))
 81
 82(test-equal "文文文" (let ((s (string-copy "abc"))) (string-fill! s (string-ref "文" 0)) s))
 83
 84(test-equal (string-ref "ハ" 0) (with-input-from-string "全角ハンカク"
 85                           (lambda () (read-char) (read-char) (read-char))))
 86
 87(test-equal "個々" (with-output-to-string
 88              (lambda ()
 89                (write-char (string-ref "個" 0))
 90                (write-char (string-ref "々" 0)))))
 91
 92;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 93;; library
 94
 95(test-equal "出力改行\n" (with-output-to-string
 96                   (lambda () (print "出" (string-ref "力" 0) "改行"))))
 97
 98(test-equal "出力" (with-output-to-string
 99              (lambda () (print* "出" (string-ref "力" 0) ""))))
100
101(test-equal "逆リスト→文字列" (reverse-list->string
102                     (map (cut string-ref <> 0)
103                          '("列" "字" "文" "→" "ト" "ス" "リ" "逆"))))
104
105(test-error (utf8->string #u8(255 1 2)))
106(test-assert (utf8->string #u8(255 1 2) #f))
107(test-equal (string-length (utf8->string #u8(255 1 2) #f)) 3)
108
109;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
110;; extras
111
112(test-equal "这是" (with-input-from-string "这是中文" (cut read-string 2)))
113
114(define s "abcdef")
115(call-with-input-string "这是中文" (cut read-string! 2 s <> 2))
116(test-equal "ab这是ef" s)
117       
118(define s "这是中文")
119(call-with-input-string "abcd" (cut read-string! 1 s <> 2))
120(test-equal "这是a文" s)
121       
122(test-equal "这是" (with-output-to-string (cut write-string "这是中文" (current-output-port) 0 2)))
123
124(test-equal "我爱她" (conc (with-input-from-string "我爱你"
125                      (cut read-token (lambda (c)
126                                        (memv c (map (cut string-ref <> 0)
127                                                     '("爱" "您" "我"))))))
128                        "她"))
129
130(test-equal '("第一" "第二" "第三") (string-chop "第一第二第三" 2))
131
132(test-equal '("第一" "第二" "第三" "…") (string-chop "第一第二第三…" 2))
133
134(test-equal '("a" "bc" "第" "f几") (string-split "a,bc、第,f几" ",、"))
135
136(test-equal "THE QUICK BROWN FOX JUMPED OVER THE LAZY SLEEPING DOG"
137    (string-translate "the quick brown fox jumped over the lazy sleeping dog"
138                      "abcdefghijklmnopqrstuvwxyz"
139                      "ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
140(test-equal ":foo:bar:baz" (string-translate "/foo/bar/baz" "/" ":"))
141(test-equal "你爱我" (string-translate "我爱你" "我你" "你我"))
142(test-equal "你爱我" (string-translate "我爱你" '(#\我 #\你) '(#\你 #\我)))
143(test-equal "我你" (string-translate "我爱你" "爱"))
144(test-equal "我你" (string-translate "我爱你" #\爱))
145
146(test-assert (substring=? "日本語" "日本語"))
147(test-assert (substring=? "日本語" "日本"))
148(test-assert (substring=? "日本" "日本語"))
149(test-assert (substring=? "日本語" "本語" 1))
150(test-assert (substring=? "日本語" "本" 1 0 1))
151(test-assert (substring=? "听说上海的东西很贵" "上海的东西很便宜" 2 0 5))
152
153(test-equal 2 (substring-index "上海" "听说上海的东西很贵"))
154
155;; case folding
156
157(test-assert (string-ci=? "abc" "ABC"))
158(test-assert (string-ci=? "Xῌηιx" "xηιῌX"))
159(test-assert (string-ci=? "αβξ" "αβξ"))
160(test-assert (string-ci=? "αβξ" "ΑΒΞ"))
161
162(test-end)
163
164(test-exit)
165
Trap