~ chicken-core (chicken-5) 01b254c2c8746a9cd88b0fd273329f5b94514b6c
commit 01b254c2c8746a9cd88b0fd273329f5b94514b6c
Author: Peter Bex <peter@more-magic.net>
AuthorDate: Fri May 14 08:32:48 2021 +0200
Commit: Mario Domenech Goulart <mario@parenteses.org>
CommitDate: Sun May 16 15:35:41 2021 +0200
Update irregex to upstream 0.9.9 (rev 3c367082)
This fixes an edge case from upstream bug #24 where replacing empty
strings with replace/all at the start of a string would incorrectly
duplicate the starting character, found by Sandra Snan.
Signed-off-by: Mario Domenech Goulart <mario@parenteses.org>
diff --git a/NEWS b/NEWS
index 1e852d90..c5eb0f03 100644
--- a/NEWS
+++ b/NEWS
@@ -6,9 +6,13 @@
- Fixed a bug where optimisations for `irregex-match?` would cause
runtime errors due to the inlined specialisations not being
fully-expanded (see #1690).
- - Irregex has been updated to upstream 0.9.8, which fixes behaviour
+ - Irregex has been updated to upstream 0.9.9, which fixes behaviour
of irregex-replace/all with positive lookbehind so all matches are
- replaced instead of only the first (reported by Kay Rhodes).
+ replaced instead of only the first (reported by Kay Rhodes), and
+ a regression regarding replacing empty matches which was introduced
+ by the fixes in 0.9.7 (reported by Sandra Snan). Finally, the
+ http-url shorthand now allows any top-level domain and the old
+ "top-level-domain" now also supports "edu" (fixed by Sandra Snan).
- current-milliseconds has been deprecated in favor of the name
current-process-milliseconds, to avoid confusion due to naming
of current-milliseconds versus current-seconds, which do something
diff --git a/irregex-core.scm b/irregex-core.scm
index 42f2a806..8f672333 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -1,6 +1,6 @@
;;;; irregex.scm -- IrRegular Expressions
;;
-;; Copyright (c) 2005-2020 Alex Shinn. All rights reserved.
+;; Copyright (c) 2005-2021 Alex Shinn. All rights reserved.
;; BSD-style license: http://synthcode.com/license.txt
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -30,6 +30,7 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; History
+;; 0.9.9: 2021/05/14 - more comprehensive fix for repeated empty matches
;; 0.9.8: 2020/07/13 - fix irregex-replace/all with look-behind patterns
;; 0.9.7: 2019/12/31 - more intuitive handling of empty matches in -fold,
;; -replace and -split
@@ -2348,8 +2349,8 @@
(domain . (seq domain-atom (+ #\. domain-atom)))
;; XXXX now anything can be a top-level domain, but this is still handy
(top-level-domain . (w/nocase (or "arpa" "com" "gov" "mil" "net" "org"
- "aero" "biz" "coop" "info" "museum"
- "name" "pro" (= 2 alpha))))
+ "edu" "aero" "biz" "coop" "info"
+ "museum" "name" "pro" (= 2 alpha))))
(domain/common . (seq (+ domain-atom #\.) top-level-domain))
;;(email-local-part . (seq (+ (or (~ #\") string))))
(email-local-part . (+ (or alphanumeric #\_ #\- #\. #\+)))
@@ -2360,7 +2361,7 @@
(seq "%" hex-digit hex-digit)))
(http-url . (w/nocase
"http" (? "s") "://"
- (or domain/common ipv4-address) ;; (seq "[" ipv6-address "]")
+ (or domain ipv4-address) ;; (seq "[" ipv6-address "]")
(? ":" (+ numeric)) ;; port
;; path
(? "/" (* (or url-char "/"))
@@ -3889,9 +3890,9 @@
(if (not (and (integer? end) (exact? end)))
(error 'irregex-fold "not an exact integer" end))
(irregex-match-chunker-set! matches irregex-basic-string-chunker)
- (let lp ((src init-src) (i start) (acc knil))
+ (let lp ((src init-src) (from start) (i start) (acc knil))
(if (>= i end)
- (finish i acc)
+ (finish from acc)
(let ((m (irregex-search/matches
irx
irregex-basic-string-chunker
@@ -3900,18 +3901,18 @@
i
matches)))
(if (not m)
- (finish i acc)
+ (finish from acc)
(let ((j (%irregex-match-end-index m 0))
- (acc (kons i m acc)))
+ (acc (kons from m acc)))
(irregex-reset-matches! matches)
(cond
((flag-set? (irregex-flags irx) ~consumer?)
(finish j acc))
((= j i)
;; skip one char forward if we match the empty string
- (lp (list str (+ j 1) end) (+ j 1) acc))
+ (lp (list str j end) j (+ j 1) acc))
(else
- (lp (list str j end) j acc))))))))))
+ (lp (list str j end) j j acc))))))))))
(define (irregex-fold irx kons . args)
(if (not (procedure? kons)) (error 'irregex-fold "not a procedure" kons))
@@ -3968,15 +3969,11 @@
(irregex-fold/fast
irx
(lambda (i m acc)
- (let* ((m-start (%irregex-match-start-index m 0))
- (res (if (>= i m-start)
- (append (irregex-apply-match m o) acc)
- (append (irregex-apply-match m o)
- (cons (substring str i m-start) acc)))))
- ;; include the skipped char on empty matches
- (if (= i (%irregex-match-end-index m 0))
- (cons (substring str i (+ i 1)) res)
- res)))
+ (let ((m-start (%irregex-match-start-index m 0)))
+ (if (>= i m-start)
+ (append (irregex-apply-match m o) acc)
+ (append (irregex-apply-match m o)
+ (cons (substring str i m-start) acc)))))
'()
str
(lambda (i acc)
@@ -4037,9 +4034,6 @@
irx
(lambda (i m a)
(cond
- ((= i (%irregex-match-end-index m 0))
- ;; empty match, include the skipped char to rejoin in finish
- (cons (string-ref str i) a))
((= i (%irregex-match-start-index m 0))
a)
(else
diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm
index 18582809..f1aefc21 100644
--- a/tests/test-irregex.scm
+++ b/tests/test-irregex.scm
@@ -400,7 +400,7 @@
(rope "bob@test.com and fred@example.com")
(lambda (src i s) (reverse s))))
(test-equal '("poo poo ")
- (irregex-fold '(* "poo ")
+ (irregex-fold '(+ "poo ")
(lambda (i m s)
(if (< i (irregex-match-end-index m 0))
(cons (irregex-match-substring m) s)
@@ -438,6 +438,9 @@
(test-equal "x- y- z-"
(irregex-replace/all '(: (look-behind (or "x" "y" "z")) "a")
"xa ya za" "-"))
+ (test-equal "any gosh darned string"
+ (irregex-replace/all '(: bos (* whitespace))
+ "any gosh darned string" ""))
(test-equal '("foo" " " "foo" " " "b" "a" "r" " " "foo")
(irregex-extract '(or (: bow "foo" eow) any) "foo foo bar foo"))
(test-equal '("f" "o" "o" "b" "a" "r" "b" "a" "z")
Trap