~ chicken-core (chicken-5) 9d905e1acd87b32b744fc308891bd968bf97f10a


commit 9d905e1acd87b32b744fc308891bd968bf97f10a
Author:     Peter Bex <peter.bex@xs4all.nl>
AuthorDate: Tue Nov 27 20:40:04 2012 +0100
Commit:     felix <felix@call-with-current-continuation.org>
CommitDate: Tue Nov 27 21:26:07 2012 +0100

    Irregex: Ensure 'bos is matched only once in irregex/fold; this fixes #686 and updates irregex to 0.9.1 (upstream changesets a6444839100c through 1741bbf14d3e)
    
    Upstream changeset message for this bugfix: Don't bother looping in irregex-fold(/chunked)/fast if the irregex is a searcher. Either it only matches the beginning, or it already consumed the rightmost match.
    
    Signed-off-by: felix <felix@call-with-current-continuation.org>

diff --git a/NEWS b/NEWS
index 39c5bb2e..4ff324f1 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,7 @@
 
 - Core libraries
   - Fixed EINTR handling in process-wait and when reading from file ports.
+  - Irregex is updated to 0.9.1, which includes bugfixes and faster submatches.
 
 4.8.0
 
diff --git a/irregex-core.scm b/irregex-core.scm
index 55316796..c83f8906 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -27,10 +27,14 @@
 ;; performance tuning, but you can only go so far while staying
 ;; portable.  AND-LET*, SRFI-9 records and custom macros would've been
 ;; nice.
+;;
+;; Version 1.0 will be released as a portable R7RS library.
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;; History
 ;;
+;; 0.9.1: 2012/11/27 - various accumulated bugfixes
+;; 0.9.0: 2012/06/03 - Using tags for match extraction from Peter Bex.
 ;; 0.8.3: 2011/12/18 - various accumulated bugfixes
 ;; 0.8.2: 2010/08/28 - (...)? submatch extraction fix and alternate
 ;;                     named submatches from Peter Bex
@@ -3830,7 +3834,12 @@
                       (lp (+ end 1) acc)
                       (let ((acc (kons i m acc)))
                         (irregex-reset-matches! matches)
-                        (lp end acc))))))))))
+                        ;; no need to continue looping if this is a
+                        ;; searcher - it's already consumed the only
+                        ;; available match
+                        (if (flag-set? (irregex-flags irx) ~searcher?)
+                            (finish end acc)
+                            (lp end acc)))))))))))
 
 (define (irregex-fold irx kons . args)
   (if (not (procedure? kons)) (%irregex-error 'irregex-fold "not a procedure" kons))
@@ -3861,7 +3870,12 @@
                           (lp end-src (+ end-index 1) acc))
                       (let ((acc (kons start i m acc)))
                         (irregex-reset-matches! matches)
-                        (lp end-src end-index acc))))))))))
+                        ;; no need to continue looping if this is a
+                        ;; searcher - it's already consumed the only
+                        ;; available match
+                        (if (flag-set? (irregex-flags irx) ~searcher?)
+                            (finish end-src end-index acc)
+                            (lp end-src end-index acc)))))))))))
 
 (define (irregex-fold/chunked irx kons . args)
   (if (not (procedure? kons)) (%irregex-error 'irregex-fold/chunked "not a procedure" kons))
diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm
index cef431c8..7440e186 100644
--- a/tests/test-irregex.scm
+++ b/tests/test-irregex.scm
@@ -78,8 +78,8 @@
    (fast)
    ))
 
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; chunked irregex
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ;; chunked irregex
 
 (define (rope . args)
   (map (lambda (x) (if (pair? x) x (list x 0 (string-length x)))) args))
@@ -363,6 +363,18 @@
                     (lambda (i m s) (cons (irregex-match-substring m) s))
                     '()
                     "poo poo platter"))
+  (test-equal "*  x   "
+      (irregex-replace/all
+       (irregex '(: bos #\space) 'backtrack) "   x   " "*"))
+  (test-equal "*  x   "
+      (irregex-replace/all
+       (irregex '(: bos #\space) 'dfa) "   x   " "*"))
+  (test-equal "***x***"
+      (irregex-replace/all
+       (irregex '(: #\space) 'backtrack) "   x   " "*"))
+  (test-equal "***x***"
+      (irregex-replace/all
+       (irregex '(: #\space) 'dfa) "   x   " "*"))
   )
 
 
Trap