~ chicken-core (chicken-5) 7d0520e145bc251abe2499af8149ff271ede6b29
commit 7d0520e145bc251abe2499af8149ff271ede6b29
Author: Peter Bex <peter.bex@xs4all.nl>
AuthorDate: Wed Jul 13 00:44:41 2011 +0200
Commit: Peter Bex <peter.bex@xs4all.nl>
CommitDate: Wed Jul 13 00:44:41 2011 +0200
Apply upstream irregex changesets 65f3099f0aa6 and 4b9d8131ca52 (fixes the 'help my irregex is half-matched' part of #636. Still doesn't explain why sometimes the input does get rejected on some machines)
diff --git a/irregex-core.scm b/irregex-core.scm
index cfa3a16c..4f4656b5 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -2653,6 +2653,11 @@
(bit (remainder i 24)))
(not (zero? (bit-and (vector-ref mst cell) (bit-shl 1 bit))))))
+(define (nfa-multi-state-contains-only? mst i)
+ (let ((cell (quotient i 24))
+ (bit (remainder i 24)))
+ (= (vector-ref mst cell) (bit-shl 1 bit))))
+
(define (nfa-multi-state-add! mst i)
(let ((cell (quotient i 24))
(bit (remainder i 24)))
@@ -2766,8 +2771,13 @@
(let lp ((ls existing) (res '()))
(cond
((null? ls)
- (cond ; First try to find a group that includes this state
- ((find (lambda (x) (nfa-multi-state-contains? (cdr x) state)) existing) =>
+ (cond
+ ;; First try to find a group that includes *only* this state.
+ ;; TRICKY!: If it contains other states too, we will end up in trouble
+ ;; later on if the group needs to be broken up because of overlapping
+ ;; csets, since then you don't know what parts of the overlap "belong"
+ ;; to the state we are about to add or the one that was already there.
+ ((find (lambda (x) (nfa-multi-state-contains-only? (cdr x) state)) existing) =>
(lambda (existing-state) ; If found, merge charsets with it
(set-car! existing-state (cset-union (car existing-state) elt))
existing))
@@ -3615,9 +3625,11 @@
(let ((a-range (car a))
(b-range (car b)))
(cond
- ((char<? (next-char (cdr a-range)) (car b-range))
+ ;; Can't use next-char here since it will cause an error if we are
+ ;; comparing a cset with the maximum character as high char.
+ ((< (+ (char->integer (cdr a-range)) 1) (char->integer (car b-range)))
(union-range (cdr a) b (cons a-range res)))
- ((char>? (car a-range) (next-char (cdr b-range)))
+ ((> (char->integer (car a-range)) (+ (char->integer (cdr b-range)) 1))
(union-range (cdr b) a (cons b-range res)))
((char>=? (cdr a-range) (car b-range))
(union-range (cons (char-ranges-union a-range b-range) (cdr a))
diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm
index 8834ab98..23fbcb1e 100644
--- a/tests/test-irregex.scm
+++ b/tests/test-irregex.scm
@@ -226,6 +226,7 @@
(test-assert (not (irregex-search '(: "ab" (~ any)) "abc")))
(test-assert (not (irregex-search '("") "abc")))
(test-assert (not (irregex-search '(: "ab" ("")) "abc")))
+ (test-assert (not (irregex-search '(: (+ print) white) "abc")))
)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Trap