~ chicken-core (chicken-5) 01b254c2c8746a9cd88b0fd273329f5b94514b6c
commit 01b254c2c8746a9cd88b0fd273329f5b94514b6c Author: Peter Bex <peter@more-magic.net> AuthorDate: Fri May 14 08:32:48 2021 +0200 Commit: Mario Domenech Goulart <mario@parenteses.org> CommitDate: Sun May 16 15:35:41 2021 +0200 Update irregex to upstream 0.9.9 (rev 3c367082) This fixes an edge case from upstream bug #24 where replacing empty strings with replace/all at the start of a string would incorrectly duplicate the starting character, found by Sandra Snan. Signed-off-by: Mario Domenech Goulart <mario@parenteses.org> diff --git a/NEWS b/NEWS index 1e852d90..c5eb0f03 100644 --- a/NEWS +++ b/NEWS @@ -6,9 +6,13 @@ - Fixed a bug where optimisations for `irregex-match?` would cause runtime errors due to the inlined specialisations not being fully-expanded (see #1690). - - Irregex has been updated to upstream 0.9.8, which fixes behaviour + - Irregex has been updated to upstream 0.9.9, which fixes behaviour of irregex-replace/all with positive lookbehind so all matches are - replaced instead of only the first (reported by Kay Rhodes). + replaced instead of only the first (reported by Kay Rhodes), and + a regression regarding replacing empty matches which was introduced + by the fixes in 0.9.7 (reported by Sandra Snan). Finally, the + http-url shorthand now allows any top-level domain and the old + "top-level-domain" now also supports "edu" (fixed by Sandra Snan). - current-milliseconds has been deprecated in favor of the name current-process-milliseconds, to avoid confusion due to naming of current-milliseconds versus current-seconds, which do something diff --git a/irregex-core.scm b/irregex-core.scm index 42f2a806..8f672333 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -1,6 +1,6 @@ ;;;; irregex.scm -- IrRegular Expressions ;; -;; Copyright (c) 2005-2020 Alex Shinn. All rights reserved. +;; Copyright (c) 2005-2021 Alex Shinn. All rights reserved. ;; BSD-style license: http://synthcode.com/license.txt ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -30,6 +30,7 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; History +;; 0.9.9: 2021/05/14 - more comprehensive fix for repeated empty matches ;; 0.9.8: 2020/07/13 - fix irregex-replace/all with look-behind patterns ;; 0.9.7: 2019/12/31 - more intuitive handling of empty matches in -fold, ;; -replace and -split @@ -2348,8 +2349,8 @@ (domain . (seq domain-atom (+ #\. domain-atom))) ;; XXXX now anything can be a top-level domain, but this is still handy (top-level-domain . (w/nocase (or "arpa" "com" "gov" "mil" "net" "org" - "aero" "biz" "coop" "info" "museum" - "name" "pro" (= 2 alpha)))) + "edu" "aero" "biz" "coop" "info" + "museum" "name" "pro" (= 2 alpha)))) (domain/common . (seq (+ domain-atom #\.) top-level-domain)) ;;(email-local-part . (seq (+ (or (~ #\") string)))) (email-local-part . (+ (or alphanumeric #\_ #\- #\. #\+))) @@ -2360,7 +2361,7 @@ (seq "%" hex-digit hex-digit))) (http-url . (w/nocase "http" (? "s") "://" - (or domain/common ipv4-address) ;; (seq "[" ipv6-address "]") + (or domain ipv4-address) ;; (seq "[" ipv6-address "]") (? ":" (+ numeric)) ;; port ;; path (? "/" (* (or url-char "/")) @@ -3889,9 +3890,9 @@ (if (not (and (integer? end) (exact? end))) (error 'irregex-fold "not an exact integer" end)) (irregex-match-chunker-set! matches irregex-basic-string-chunker) - (let lp ((src init-src) (i start) (acc knil)) + (let lp ((src init-src) (from start) (i start) (acc knil)) (if (>= i end) - (finish i acc) + (finish from acc) (let ((m (irregex-search/matches irx irregex-basic-string-chunker @@ -3900,18 +3901,18 @@ i matches))) (if (not m) - (finish i acc) + (finish from acc) (let ((j (%irregex-match-end-index m 0)) - (acc (kons i m acc))) + (acc (kons from m acc))) (irregex-reset-matches! matches) (cond ((flag-set? (irregex-flags irx) ~consumer?) (finish j acc)) ((= j i) ;; skip one char forward if we match the empty string - (lp (list str (+ j 1) end) (+ j 1) acc)) + (lp (list str j end) j (+ j 1) acc)) (else - (lp (list str j end) j acc)))))))))) + (lp (list str j end) j j acc)))))))))) (define (irregex-fold irx kons . args) (if (not (procedure? kons)) (error 'irregex-fold "not a procedure" kons)) @@ -3968,15 +3969,11 @@ (irregex-fold/fast irx (lambda (i m acc) - (let* ((m-start (%irregex-match-start-index m 0)) - (res (if (>= i m-start) - (append (irregex-apply-match m o) acc) - (append (irregex-apply-match m o) - (cons (substring str i m-start) acc))))) - ;; include the skipped char on empty matches - (if (= i (%irregex-match-end-index m 0)) - (cons (substring str i (+ i 1)) res) - res))) + (let ((m-start (%irregex-match-start-index m 0))) + (if (>= i m-start) + (append (irregex-apply-match m o) acc) + (append (irregex-apply-match m o) + (cons (substring str i m-start) acc))))) '() str (lambda (i acc) @@ -4037,9 +4034,6 @@ irx (lambda (i m a) (cond - ((= i (%irregex-match-end-index m 0)) - ;; empty match, include the skipped char to rejoin in finish - (cons (string-ref str i) a)) ((= i (%irregex-match-start-index m 0)) a) (else diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm index 18582809..f1aefc21 100644 --- a/tests/test-irregex.scm +++ b/tests/test-irregex.scm @@ -400,7 +400,7 @@ (rope "bob@test.com and fred@example.com") (lambda (src i s) (reverse s)))) (test-equal '("poo poo ") - (irregex-fold '(* "poo ") + (irregex-fold '(+ "poo ") (lambda (i m s) (if (< i (irregex-match-end-index m 0)) (cons (irregex-match-substring m) s) @@ -438,6 +438,9 @@ (test-equal "x- y- z-" (irregex-replace/all '(: (look-behind (or "x" "y" "z")) "a") "xa ya za" "-")) + (test-equal "any gosh darned string" + (irregex-replace/all '(: bos (* whitespace)) + "any gosh darned string" "")) (test-equal '("foo" " " "foo" " " "b" "a" "r" " " "foo") (irregex-extract '(or (: bow "foo" eow) any) "foo foo bar foo")) (test-equal '("f" "o" "o" "b" "a" "r" "b" "a" "z")Trap