~ chicken-core (chicken-5) d0b3cceb40d0653e567b25b78b7d8b90eebc231b
commit d0b3cceb40d0653e567b25b78b7d8b90eebc231b
Author: Peter Bex <peter.bex@xs4all.nl>
AuthorDate: Fri Dec 9 10:48:49 2011 +0100
Commit: felix <felix@call-with-current-continuation.org>
CommitDate: Sat Dec 10 13:57:09 2011 +0100
Apply upstream changesets ba70feace1dd and 78ba6b09e021
This fixes an infinite loop problem with irregex-fold when empty matches are found (reported by Manuel Serrano) and adds some extra tests for complemented UTF-8 character sets.
Signed-off-by: felix <felix@call-with-current-continuation.org>
diff --git a/irregex-core.scm b/irregex-core.scm
index 982f57e4..54413bfe 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -1485,7 +1485,7 @@
(map (lambda (_)
`(/ ,(integer->char #x80) ,(integer->char #xFF)))
(zero-to (+ i lo-len))))))
- (zero-to (- (length hi-ls) lo-len 1)))
+ (zero-to (- (length hi-ls) (+ lo-len 1))))
(list
(sre-sequence
(cons `(/ ,(integer->char
@@ -3752,10 +3752,13 @@
matches)))
(if (not m)
(finish i acc)
- (let* ((end (%irregex-match-end-index m 0))
- (acc (kons i m acc)))
- (irregex-reset-matches! matches)
- (lp end acc))))))))
+ (let ((end (%irregex-match-end-index m 0)))
+ (if (= end i)
+ ;; skip one char forward if we match the empty string
+ (lp (+ end 1) acc)
+ (let ((acc (kons i m acc)))
+ (irregex-reset-matches! matches)
+ (lp end acc))))))))))
(define (irregex-fold irx kons . args)
(if (not (procedure? kons)) (%irregex-error 'irregex-fold "not a procedure" kons))
@@ -3777,11 +3780,16 @@
(let ((m (irregex-search/matches irx cnk start i matches)))
(if (not m)
(finish start i acc)
- (let* ((acc (kons start i m acc))
- (end-src (%irregex-match-end-chunk m 0))
- (end-index (%irregex-match-end-index m 0)))
- (irregex-reset-matches! matches)
- (lp end-src end-index acc))))))))
+ (let ((end-src (%irregex-match-end-chunk m 0))
+ (end-index (%irregex-match-end-index m 0)))
+ (if (and (eq? end-src start) (= end-index i))
+ (if (>= end-index ((chunker-get-end cnk) end-src ))
+ (let ((next ((chunker-get-next cnk) end-src)))
+ (lp next ((chunker-get-start cnk) next) acc))
+ (lp end-src (+ end-index 1) acc))
+ (let ((acc (kons start i m acc)))
+ (irregex-reset-matches! matches)
+ (lp end-src end-index acc))))))))))
(define (irregex-fold/chunked irx kons . args)
(if (not (procedure? kons)) (%irregex-error 'irregex-fold/chunked "not a procedure" kons))
diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm
index a06bc6bd..fd2cb97a 100644
--- a/tests/test-irregex.scm
+++ b/tests/test-irregex.scm
@@ -358,6 +358,11 @@
rope-chunker
(rope "bob@test.com and fred@example.com")
(lambda (src i s) (reverse s))))
+ (test-equal '("poo poo ")
+ (irregex-fold '(* "poo ")
+ (lambda (i m s) (cons (irregex-match-substring m) s))
+ '()
+ "poo poo platter"))
)
@@ -499,5 +504,13 @@
(test-assert (not (irregex-search "(?u:<[あ-ん]*>)" "<ひらgがな>")))
(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>")))
-(test-end)(test-exit)
+(test-assert (irregex-search "(?u:<[^あ-ん]*>)" "<abc>"))
+(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<あん>")))
+(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<ひらがな>")))
+(test-assert (irregex-search "(?u:<[^あ-ん語]*>)" "<abc>"))
+(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<あん>")))
+(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<ひらがな>")))
+(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>")))
+
+(test-end)
Trap