~ chicken-core (chicken-5) e18379d79abf0b76d88be5fbd45187b6ff500c15
commit e18379d79abf0b76d88be5fbd45187b6ff500c15 Author: LemonBoy <thatlemon@gmail.com> AuthorDate: Thu Nov 9 13:29:08 2017 +0100 Commit: Evan Hanson <evhan@foldling.org> CommitDate: Sun Nov 12 09:40:38 2017 +1300 Fix an error in unicode-range->utf8-pattern The sequence generated for a utf8 character class contained an unintended trailing '(), causing the code to fail when `sre-length-ranges' is called. Reported by Chunyang Xu at CHICKEN-users. Signed-off-by: Peter Bex <peter@more-magic.net> Signed-off-by: Evan Hanson <evhan@foldling.org> diff --git a/NEWS b/NEWS index 212f40b2..3b36ebde 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,8 @@ on s8vectors (thanks to Kristian Lein-Mathisen). - Large literals no longer crash with "invalid encoded numeric literal" on mingw-64 (#1344, thanks to Lemonboy). + - Unit irregex: Fix bug that prevented multibyte UTF-8 character sets + from being matched correctly (Thanks to Lemonboy and Chunyang Xu). - Runtime system: - The profiler no longer uses malloc from a signal handler which may diff --git a/irregex-core.scm b/irregex-core.scm index 7ac043d3..ba6d1f72 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -1407,12 +1407,11 @@ (unicode-range-up-to hi-ls))) (let lp ((lo-ls lo-ls) (hi-ls hi-ls)) (cond - ((null? lo-ls) - '()) ((= (car lo-ls) (car hi-ls)) (sre-sequence - (list (integer->char (car lo-ls)) - (lp (cdr lo-ls) (cdr hi-ls))))) + (cons (integer->char (car lo-ls)) + (if (null? (cdr lo-ls)) '() + (cons (lp (cdr lo-ls) (cdr hi-ls)) '()))))) ((= (+ (car lo-ls) 1) (car hi-ls)) (sre-alternate (list (unicode-range-up-from lo-ls) (unicode-range-up-to hi-ls)))) diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm index 1a460549..9a5402c4 100644 --- a/tests/test-irregex.scm +++ b/tests/test-irregex.scm @@ -538,5 +538,7 @@ (test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<ひらがな>"))) (test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>"))) +(test-assert (not (irregex-search (irregex "[一二]" 'utf8 #t) "三四"))) + (test-end)Trap