~ chicken-core (chicken-5) b0600e72f5158adfcab13d1949e5dfaa0aa72ef4
commit b0600e72f5158adfcab13d1949e5dfaa0aa72ef4
Author: Peter Bex <peter@more-magic.net>
AuthorDate: Tue Jul 6 15:15:34 2021 +0200
Commit: Mario Domenech Goulart <mario@parenteses.org>
CommitDate: Tue Jul 6 21:25:36 2021 +0200
Bump irregex to upstream commit 29334af, bringing us to version 0.9.10
This fixes upstream ticket #25, where newlines would overlap with
"bol" in situations where a string matches multiple times due to
inconsistent handling.
Signed-off-by: Mario Domenech Goulart <mario@parenteses.org>
diff --git a/NEWS b/NEWS
index 53a40f0f..2e254e48 100644
--- a/NEWS
+++ b/NEWS
@@ -6,15 +6,17 @@
- Fixed a bug where optimisations for `irregex-match?` would cause
runtime errors due to the inlined specialisations not being
fully-expanded (see #1690).
- - Irregex has been updated to upstream 0.9.9, which fixes behaviour
+ - Irregex has been updated to upstream 0.9.10, which fixes behaviour
of irregex-replace/all with positive lookbehind so all matches are
replaced instead of only the first (reported by Kay Rhodes), and
a regression regarding replacing empty matches which was introduced
by the fixes in 0.9.7 (reported by Sandra Snan). Also, the
http-url shorthand now allows any top-level domain and the old
"top-level-domain" now also supports "edu" (fixed by Sandra Snan).
- Finally, a problem was fixed with capturing groups inside a kleene
+ Also, a problem was fixed with capturing groups inside a kleene
star, which could sometimes return incorrect parts of the match.
+ Finally, "bol" handling was fixed to handle newlines consistently
+ so that multiple matches don't overlap (reported by Sandra Snan).
- current-milliseconds has been deprecated in favor of the name
current-process-milliseconds, to avoid confusion due to naming
of current-milliseconds versus current-seconds, which do something
diff --git a/irregex-core.scm b/irregex-core.scm
index f86b7992..55e9a6c0 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -30,6 +30,10 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; History
+;; 0.9.10: 2021/07/06 - fixes for submatches under kleene star, empty seqs
+;; in alternations, and bol in folds for backtracking
+;; matcher (thanks John Clements and snan for reporting
+;; and Peter Bex for fixing)
;; 0.9.9: 2021/05/14 - more comprehensive fix for repeated empty matches
;; 0.9.8: 2020/07/13 - fix irregex-replace/all with look-behind patterns
;; 0.9.7: 2019/12/31 - more intuitive handling of empty matches in -fold,
@@ -3508,9 +3512,10 @@
(fail))))
((bol)
(lambda (cnk init src str i end matches fail)
- (if (or (and (eq? src (car init)) (eqv? i (cdr init)))
- (and (> i ((chunker-get-start cnk) src))
- (eqv? #\newline (string-ref str (- i 1)))))
+ (if (let ((ch (if (> i ((chunker-get-start cnk) src))
+ (string-ref str (- i 1))
+ (chunker-prev-char cnk init src))))
+ (or (not ch) (eqv? #\newline ch)))
(next cnk init src str i end matches fail)
(fail))))
((bow)
@@ -3908,13 +3913,14 @@
matches)))
(if (not m)
(finish from acc)
- (let ((j (%irregex-match-end-index m 0))
+ (let ((j-start (%irregex-match-start-index m 0))
+ (j (%irregex-match-end-index m 0))
(acc (kons from m acc)))
(irregex-reset-matches! matches)
(cond
((flag-set? (irregex-flags irx) ~consumer?)
(finish j acc))
- ((= j i)
+ ((= j j-start)
;; skip one char forward if we match the empty string
(lp (list str j end) j (+ j 1) acc))
(else
diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm
index 5cf5b685..0888f09b 100644
--- a/tests/test-irregex.scm
+++ b/tests/test-irregex.scm
@@ -451,6 +451,10 @@
(irregex-extract (irregex "[aeiou]*") "foobarbaz"))
(test-equal '("Line 1\n" "Line 2\n" "Line 3")
(irregex-split 'bol "Line 1\nLine 2\nLine 3"))
+ (test-equal '("foo\n" "bar\n" "baz\n")
+ (irregex-extract '(: bol (+ alpha) newline) "\nfoo\nbar\nbaz\n"))
+ (test-equal '("\nblah" "\nblah" "\nblah")
+ (irregex-extract '(: newline "blah" eol) "\nblah\nblah\nblah\n"))
)
Trap