~ chicken-core (chicken-5) 0c61a04d2634920b7e587e4fa1bb6fabd432a01f


commit 0c61a04d2634920b7e587e4fa1bb6fabd432a01f
Author:     Peter Bex <peter.bex@xs4all.nl>
AuthorDate: Sun Sep 23 15:43:00 2012 +0200
Commit:     felix <felix@call-with-current-continuation.org>
CommitDate: Sun Sep 23 23:30:13 2012 +0200

    Irregex: Fix problem with suffixes returned while searching instead of matching (thanks to Sven Hartrumpf for reporting this) (upstream changeset afae3f6a8f8a)
    
    Now, finalizers are run whenever we move from an accepting state to a
    non-accepting state.  This allows memory slots to be modified even if
    we never reach an accepting state, returning a match found earlier.
    
    Signed-off-by: felix <felix@call-with-current-continuation.org>

diff --git a/irregex-core.scm b/irregex-core.scm
index edfbf01d..ce3d2e16 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -2189,8 +2189,7 @@
                   (lp1 next (get-start next) state res-src res-index finalizer)
                   (and index
                        (%irregex-match-end-chunk matches index)
-                       (or (not submatches?)
-                           (finalize! finalizer memory matches))
+                       (or (not finalizer) (finalize! finalizer memory matches))
                        #t))))
            (else
             (let* ((ch (string-ref str i))
@@ -2201,37 +2200,38 @@
                                (cdr state))))
               (cond
                (cell
-                (cond
-                 (submatches?
-                  (let ((cmds (dfa-cell-commands dfa cell)))
-                    (for-each (lambda (s)
-                                (let ((slot (vector-ref memory (cdr s)))
-                                      (chunk&position (cons src (+ i 1))))
-                                  (vector-set! slot (car s) chunk&position)))
-                              (cdr cmds))
-                    (for-each (lambda (c)
-                                (let* ((tag (vector-ref c 0))
-                                       (ss (vector-ref memory (vector-ref c 1)))
-                                       (ds (vector-ref memory (vector-ref c 2))))
-                                  (vector-set! ds tag (vector-ref ss tag))))
-                              (car cmds)))))
-                (let ((next (dfa-next-state dfa cell)))
-                 (cond
-                  ((dfa-finalizer dfa next) =>
-                   (lambda (new-finalizer)
-                     (lp2 (+ i 1) next src (+ i 1) new-finalizer)))
-                  (else (lp2 (+ i 1) next res-src res-index finalizer)))))
+                (let* ((next (dfa-next-state dfa cell))
+                       (new-finalizer (dfa-finalizer dfa next)))
+                  (cond
+                   (submatches?
+                    (let ((cmds (dfa-cell-commands dfa cell)))
+                      ;; Save match when we're moving from accepting state to
+                      ;; rejecting state; this could be the last accepting one.
+                      (cond ((and finalizer (not new-finalizer))
+                             (finalize! finalizer memory matches)))
+                      (for-each (lambda (s)
+                                  (let ((slot (vector-ref memory (cdr s)))
+                                        (chunk&position (cons src (+ i 1))))
+                                    (vector-set! slot (car s) chunk&position)))
+                                (cdr cmds))
+                      (for-each (lambda (c)
+                                  (let* ((tag (vector-ref c 0))
+                                         (ss (vector-ref memory (vector-ref c 1)))
+                                         (ds (vector-ref memory (vector-ref c 2))))
+                                    (vector-set! ds tag (vector-ref ss tag))))
+                                (car cmds)))))
+                  (if new-finalizer
+                      (lp2 (+ i 1) next src (+ i 1) new-finalizer)
+                      (lp2 (+ i 1) next res-src res-index #f))))
                (res-src
                 (cond
                  (index
                   (irregex-match-end-chunk-set! matches index res-src)
                   (irregex-match-end-index-set! matches index res-index)))
-                (cond (submatches?
-                       (finalize! finalizer memory matches)))
+                (cond (finalizer (finalize! finalizer memory matches)))
                 #t)
                ((and index (%irregex-match-end-chunk matches index))
-                (cond (submatches?
-                       (finalize! finalizer memory matches)))
+                (cond (finalizer (finalize! finalizer memory matches)))
                 #t)
                (else
                 #f))))))))))
diff --git a/tests/re-tests.txt b/tests/re-tests.txt
index 1cbc3793..7b233572 100644
--- a/tests/re-tests.txt
+++ b/tests/re-tests.txt
@@ -104,6 +104,7 @@ a([bc]*)(c+d)	abcd	y	&-\1-\2	abcd-b-cd
 a[bcd]*dcdcde	adcdcde	y	&	adcdcde
 a[bcd]+dcdcde	adcdcde	n	-	-
 (ab|a)b*c	abc	y	&-\1	abc-ab
+(.*)b	abc	y	&-\1	ab-a
 ((a)(b)c)(d)	abcd	y	\1-\2-\3-\4	abc-a-b-d
 ((a)(b)?c)(d)	abcd	y	\1-\2-\3-\4	abc-a-b-d
 ((a)(b)?c)(d)	acd	y	\1-\2-\3-\4	ac-a--d
Trap