~ chicken-core (chicken-5) 7d0520e145bc251abe2499af8149ff271ede6b29
commit 7d0520e145bc251abe2499af8149ff271ede6b29 Author: Peter Bex <peter.bex@xs4all.nl> AuthorDate: Wed Jul 13 00:44:41 2011 +0200 Commit: Peter Bex <peter.bex@xs4all.nl> CommitDate: Wed Jul 13 00:44:41 2011 +0200 Apply upstream irregex changesets 65f3099f0aa6 and 4b9d8131ca52 (fixes the 'help my irregex is half-matched' part of #636. Still doesn't explain why sometimes the input does get rejected on some machines) diff --git a/irregex-core.scm b/irregex-core.scm index cfa3a16c..4f4656b5 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -2653,6 +2653,11 @@ (bit (remainder i 24))) (not (zero? (bit-and (vector-ref mst cell) (bit-shl 1 bit)))))) +(define (nfa-multi-state-contains-only? mst i) + (let ((cell (quotient i 24)) + (bit (remainder i 24))) + (= (vector-ref mst cell) (bit-shl 1 bit)))) + (define (nfa-multi-state-add! mst i) (let ((cell (quotient i 24)) (bit (remainder i 24))) @@ -2766,8 +2771,13 @@ (let lp ((ls existing) (res '())) (cond ((null? ls) - (cond ; First try to find a group that includes this state - ((find (lambda (x) (nfa-multi-state-contains? (cdr x) state)) existing) => + (cond + ;; First try to find a group that includes *only* this state. + ;; TRICKY!: If it contains other states too, we will end up in trouble + ;; later on if the group needs to be broken up because of overlapping + ;; csets, since then you don't know what parts of the overlap "belong" + ;; to the state we are about to add or the one that was already there. + ((find (lambda (x) (nfa-multi-state-contains-only? (cdr x) state)) existing) => (lambda (existing-state) ; If found, merge charsets with it (set-car! existing-state (cset-union (car existing-state) elt)) existing)) @@ -3615,9 +3625,11 @@ (let ((a-range (car a)) (b-range (car b))) (cond - ((char<? (next-char (cdr a-range)) (car b-range)) + ;; Can't use next-char here since it will cause an error if we are + ;; comparing a cset with the maximum character as high char. + ((< (+ (char->integer (cdr a-range)) 1) (char->integer (car b-range))) (union-range (cdr a) b (cons a-range res))) - ((char>? (car a-range) (next-char (cdr b-range))) + ((> (char->integer (car a-range)) (+ (char->integer (cdr b-range)) 1)) (union-range (cdr b) a (cons b-range res))) ((char>=? (cdr a-range) (car b-range)) (union-range (cons (char-ranges-union a-range b-range) (cdr a)) diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm index 8834ab98..23fbcb1e 100644 --- a/tests/test-irregex.scm +++ b/tests/test-irregex.scm @@ -226,6 +226,7 @@ (test-assert (not (irregex-search '(: "ab" (~ any)) "abc"))) (test-assert (not (irregex-search '("") "abc"))) (test-assert (not (irregex-search '(: "ab" ("")) "abc"))) + (test-assert (not (irregex-search '(: (+ print) white) "abc"))) ) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Trap