~ chicken-core (chicken-5) d0b3cceb40d0653e567b25b78b7d8b90eebc231b


commit d0b3cceb40d0653e567b25b78b7d8b90eebc231b
Author:     Peter Bex <peter.bex@xs4all.nl>
AuthorDate: Fri Dec 9 10:48:49 2011 +0100
Commit:     felix <felix@call-with-current-continuation.org>
CommitDate: Sat Dec 10 13:57:09 2011 +0100

    Apply upstream changesets ba70feace1dd and 78ba6b09e021
    
    This fixes an infinite loop problem with irregex-fold when empty matches are found (reported by Manuel Serrano) and adds some extra tests for complemented UTF-8 character sets.
    
    Signed-off-by: felix <felix@call-with-current-continuation.org>

diff --git a/irregex-core.scm b/irregex-core.scm
index 982f57e4..54413bfe 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -1485,7 +1485,7 @@
            (map (lambda (_)
                   `(/ ,(integer->char #x80) ,(integer->char #xFF)))
                 (zero-to (+ i lo-len))))))
-       (zero-to (- (length hi-ls) lo-len 1)))
+       (zero-to (- (length hi-ls) (+ lo-len 1))))
       (list
        (sre-sequence
         (cons `(/ ,(integer->char
@@ -3752,10 +3752,13 @@
                     matches)))
             (if (not m)
                 (finish i acc)
-                (let* ((end (%irregex-match-end-index m 0))
-                       (acc (kons i m acc)))
-                  (irregex-reset-matches! matches)
-                  (lp end acc))))))))
+                (let ((end (%irregex-match-end-index m 0)))
+                  (if (= end i)
+                      ;; skip one char forward if we match the empty string
+                      (lp (+ end 1) acc)
+                      (let ((acc (kons i m acc)))
+                        (irregex-reset-matches! matches)
+                        (lp end acc))))))))))
 
 (define (irregex-fold irx kons . args)
   (if (not (procedure? kons)) (%irregex-error 'irregex-fold "not a procedure" kons))
@@ -3777,11 +3780,16 @@
           (let ((m (irregex-search/matches irx cnk start i matches)))
             (if (not m)
                 (finish start i acc)
-                (let* ((acc (kons start i m acc))
-                       (end-src (%irregex-match-end-chunk m 0))
-                       (end-index (%irregex-match-end-index m 0)))
-                  (irregex-reset-matches! matches)
-                  (lp end-src end-index acc))))))))
+                (let ((end-src (%irregex-match-end-chunk m 0))
+                      (end-index (%irregex-match-end-index m 0)))
+                  (if (and (eq? end-src start) (= end-index i))
+                      (if (>= end-index ((chunker-get-end cnk) end-src ))
+                          (let ((next ((chunker-get-next cnk) end-src)))
+                            (lp next ((chunker-get-start cnk) next) acc))
+                          (lp end-src (+ end-index 1) acc))
+                      (let ((acc (kons start i m acc)))
+                        (irregex-reset-matches! matches)
+                        (lp end-src end-index acc))))))))))
 
 (define (irregex-fold/chunked irx kons . args)
   (if (not (procedure? kons)) (%irregex-error 'irregex-fold/chunked "not a procedure" kons))
diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm
index a06bc6bd..fd2cb97a 100644
--- a/tests/test-irregex.scm
+++ b/tests/test-irregex.scm
@@ -358,6 +358,11 @@
        rope-chunker
        (rope "bob@test.com and fred@example.com")
        (lambda (src i s) (reverse s))))
+  (test-equal '("poo poo ")
+      (irregex-fold '(* "poo ")
+                    (lambda (i m s) (cons (irregex-match-substring m) s))
+                    '()
+                    "poo poo platter"))
   )
 
 
@@ -499,5 +504,13 @@
 (test-assert (not (irregex-search "(?u:<[あ-ん]*>)" "<ひらgがな>")))
 (test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>")))
 
-(test-end)(test-exit)
+(test-assert (irregex-search "(?u:<[^あ-ん]*>)" "<abc>"))
+(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<あん>")))
+(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<ひらがな>")))
+(test-assert (irregex-search "(?u:<[^あ-ん語]*>)" "<abc>"))
+(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<あん>")))
+(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<ひらがな>")))
+(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>")))
+
+(test-end)
 
Trap