~ chicken-core (chicken-5) 7d0520e145bc251abe2499af8149ff271ede6b29


commit 7d0520e145bc251abe2499af8149ff271ede6b29
Author:     Peter Bex <peter.bex@xs4all.nl>
AuthorDate: Wed Jul 13 00:44:41 2011 +0200
Commit:     Peter Bex <peter.bex@xs4all.nl>
CommitDate: Wed Jul 13 00:44:41 2011 +0200

    Apply upstream irregex changesets 65f3099f0aa6 and 4b9d8131ca52 (fixes the 'help my irregex is half-matched' part of #636.  Still doesn't explain why sometimes the input does get rejected on some machines)

diff --git a/irregex-core.scm b/irregex-core.scm
index cfa3a16c..4f4656b5 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -2653,6 +2653,11 @@
         (bit (remainder i 24)))
     (not (zero? (bit-and (vector-ref mst cell) (bit-shl 1 bit))))))
 
+(define (nfa-multi-state-contains-only? mst i)
+  (let ((cell (quotient i 24))
+        (bit (remainder i 24)))
+    (= (vector-ref mst cell) (bit-shl 1 bit))))
+
 (define (nfa-multi-state-add! mst i)
   (let ((cell (quotient i 24))
         (bit (remainder i 24)))
@@ -2766,8 +2771,13 @@
   (let lp ((ls existing) (res '()))
     (cond
      ((null? ls)
-      (cond       ; First try to find a group that includes this state
-       ((find (lambda (x) (nfa-multi-state-contains? (cdr x) state)) existing) =>
+      (cond
+       ;; First try to find a group that includes *only* this state.
+       ;; TRICKY!: If it contains other states too, we will end up in trouble
+       ;; later on if the group needs to be broken up because of overlapping
+       ;; csets, since then you don't know what parts of the overlap "belong"
+       ;; to the state we are about to add or the one that was already there.
+       ((find (lambda (x) (nfa-multi-state-contains-only? (cdr x) state)) existing) =>
         (lambda (existing-state)    ; If found, merge charsets with it
           (set-car! existing-state (cset-union (car existing-state) elt))
           existing))
@@ -3615,9 +3625,11 @@
       (let ((a-range (car a))
             (b-range (car b)))
         (cond
-         ((char<? (next-char (cdr a-range)) (car b-range))
+         ;; Can't use next-char here since it will cause an error if we are
+         ;; comparing a cset with the maximum character as high char.
+         ((< (+ (char->integer (cdr a-range)) 1) (char->integer (car b-range)))
           (union-range (cdr a) b (cons a-range res)))
-         ((char>? (car a-range) (next-char (cdr b-range)))
+         ((> (char->integer (car a-range)) (+ (char->integer (cdr b-range)) 1))
           (union-range (cdr b) a (cons b-range res)))
          ((char>=? (cdr a-range) (car b-range))
           (union-range (cons (char-ranges-union a-range b-range) (cdr a))
diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm
index 8834ab98..23fbcb1e 100644
--- a/tests/test-irregex.scm
+++ b/tests/test-irregex.scm
@@ -226,6 +226,7 @@
   (test-assert (not (irregex-search '(: "ab" (~ any)) "abc")))
   (test-assert (not (irregex-search '("") "abc")))
   (test-assert (not (irregex-search '(: "ab" ("")) "abc")))
+  (test-assert (not (irregex-search '(: (+ print) white) "abc")))
   )
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Trap