~ chicken-core (chicken-5) d0b3cceb40d0653e567b25b78b7d8b90eebc231b
commit d0b3cceb40d0653e567b25b78b7d8b90eebc231b Author: Peter Bex <peter.bex@xs4all.nl> AuthorDate: Fri Dec 9 10:48:49 2011 +0100 Commit: felix <felix@call-with-current-continuation.org> CommitDate: Sat Dec 10 13:57:09 2011 +0100 Apply upstream changesets ba70feace1dd and 78ba6b09e021 This fixes an infinite loop problem with irregex-fold when empty matches are found (reported by Manuel Serrano) and adds some extra tests for complemented UTF-8 character sets. Signed-off-by: felix <felix@call-with-current-continuation.org> diff --git a/irregex-core.scm b/irregex-core.scm index 982f57e4..54413bfe 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -1485,7 +1485,7 @@ (map (lambda (_) `(/ ,(integer->char #x80) ,(integer->char #xFF))) (zero-to (+ i lo-len)))))) - (zero-to (- (length hi-ls) lo-len 1))) + (zero-to (- (length hi-ls) (+ lo-len 1)))) (list (sre-sequence (cons `(/ ,(integer->char @@ -3752,10 +3752,13 @@ matches))) (if (not m) (finish i acc) - (let* ((end (%irregex-match-end-index m 0)) - (acc (kons i m acc))) - (irregex-reset-matches! matches) - (lp end acc)))))))) + (let ((end (%irregex-match-end-index m 0))) + (if (= end i) + ;; skip one char forward if we match the empty string + (lp (+ end 1) acc) + (let ((acc (kons i m acc))) + (irregex-reset-matches! matches) + (lp end acc)))))))))) (define (irregex-fold irx kons . args) (if (not (procedure? kons)) (%irregex-error 'irregex-fold "not a procedure" kons)) @@ -3777,11 +3780,16 @@ (let ((m (irregex-search/matches irx cnk start i matches))) (if (not m) (finish start i acc) - (let* ((acc (kons start i m acc)) - (end-src (%irregex-match-end-chunk m 0)) - (end-index (%irregex-match-end-index m 0))) - (irregex-reset-matches! matches) - (lp end-src end-index acc)))))))) + (let ((end-src (%irregex-match-end-chunk m 0)) + (end-index (%irregex-match-end-index m 0))) + (if (and (eq? end-src start) (= end-index i)) + (if (>= end-index ((chunker-get-end cnk) end-src )) + (let ((next ((chunker-get-next cnk) end-src))) + (lp next ((chunker-get-start cnk) next) acc)) + (lp end-src (+ end-index 1) acc)) + (let ((acc (kons start i m acc))) + (irregex-reset-matches! matches) + (lp end-src end-index acc)))))))))) (define (irregex-fold/chunked irx kons . args) (if (not (procedure? kons)) (%irregex-error 'irregex-fold/chunked "not a procedure" kons)) diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm index a06bc6bd..fd2cb97a 100644 --- a/tests/test-irregex.scm +++ b/tests/test-irregex.scm @@ -358,6 +358,11 @@ rope-chunker (rope "bob@test.com and fred@example.com") (lambda (src i s) (reverse s)))) + (test-equal '("poo poo ") + (irregex-fold '(* "poo ") + (lambda (i m s) (cons (irregex-match-substring m) s)) + '() + "poo poo platter")) ) @@ -499,5 +504,13 @@ (test-assert (not (irregex-search "(?u:<[あ-ん]*>)" "<ひらgがな>"))) (test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>"))) -(test-end)(test-exit) +(test-assert (irregex-search "(?u:<[^あ-ん]*>)" "<abc>")) +(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<あん>"))) +(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<ひらがな>"))) +(test-assert (irregex-search "(?u:<[^あ-ん語]*>)" "<abc>")) +(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<あん>"))) +(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<ひらがな>"))) +(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>"))) + +(test-end)Trap