~ chicken-core (chicken-5) 7d4eb862e99e3c6914a10e1ec843f35a92a06553


commit 7d4eb862e99e3c6914a10e1ec843f35a92a06553
Author:     Peter Bex <Peter.Bex@xs4all.nl>
AuthorDate: Fri Sep 24 23:13:42 2010 +0200
Commit:     Peter Bex <Peter.Bex@xs4all.nl>
CommitDate: Fri Sep 24 23:13:42 2010 +0200

    Apply upstream changeset 40233db2c0cb (Convert all character-set matching stuff to use the cset API, simplifying the code considerably)

diff --git a/irregex-core.scm b/irregex-core.scm
index ca4d327d..f688421b 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -1195,65 +1195,45 @@
   (let* ((end (string-length str))
          (invert? (and (< start end) (eqv? #\^ (string-ref str start))))
          (utf8? (flag-set? flags ~utf8?)))
-    (define (go i chars ranges)
+    (define (go i prev-char cset)
       (if (>= i end)
           (%irregex-error "incomplete char set" str i end)
           (let ((c (string-ref str i)))
             (case c
               ((#\])
-               (if (and (null? chars) (null? ranges))
-                   (go (+ i 1) (cons #\] chars) ranges)
-                   (let ((ci? (flag-set? flags ~case-insensitive?))
-                         (hi-chars (if utf8? (filter high-char? chars) '()))
-                         (chars (if utf8? (remove high-char? chars) chars)))
+               (if (cset-empty? cset)
+                   (go (+ i 1) #\] (cset-adjoin cset #\]))
+                   (let ((ci? (flag-set? flags ~case-insensitive?)))
                      (list
-                      ((lambda (res)
-                         (if invert? (cons '~ res) (sre-alternate res)))
-                       (append
-                        hi-chars
-                        (if (pair? chars)
-                            (list
-                             (list (list->string
-                                    ((if ci?
-                                         cset-case-insensitive
-                                         (lambda (x) x))
-                                     (reverse chars)))))
-                            '())
-                        (if (pair? ranges)
-                            (let ((res (if ci?
-                                           (cset-case-insensitive
-                                            (plist->cset ranges))
-                                           (plist->cset ranges))))
-                              (list (cons '/ (cset->plist res))))
-                            '())))
+                      (let ((res (if ci? (cset-case-insensitive cset) cset)))
+                        (cset->sre (if invert? (cset-complement res) res)))
                       i))))
               ((#\-)
                (cond
                 ((or (= i start)
                      (and (= i (+ start 1)) (eqv? #\^ (string-ref str start)))
                      (eqv? #\] (string-ref str (+ i 1))))
-                 (go (+ i 1) (cons c chars) ranges))
-                ((null? chars)
+                 (go (+ i 1) c (cset-adjoin cset c)))
+                ((not prev-char)
                  (%irregex-error "bad char-set"))
                 (else
-                 (let* ((c1 (car chars))
-                        (c2 (string-ref str (+ i 1))))
+                 (let ((char (string-ref str (+ i 1))))
                    (apply
-                    (lambda (c2 j)
-                      (if (char<? c2 c1)
-                          (%irregex-error "inverted range in char-set" c1 c2)
-                          (go j (cdr chars) (cons c1 (cons c2 ranges)))))
+                    (lambda (c j)
+                      (if (char<? c prev-char)
+                          (error "inverted range in char-set" prev-char c)
+                          (go j #f (cset-union cset (range->cset prev-char c)))))
                     (cond
-                     ((and (eqv? #\\ c2) (assv c2 posix-escape-sequences))
+                     ((and (eqv? #\\ char) (assv char posix-escape-sequences))
                       => (lambda (x) (list (cdr x) (+ i 3))))
-                     ((and (eqv? #\\ c2)
+                     ((and (eqv? #\\ char)
                            (eqv? (string-ref str (+ i 2)) #\x))
                       (string-parse-hex-escape str (+ i 3) end))
-                     ((and utf8? (<= #x80 (char->integer c2) #xFF))
-                      (let ((len (utf8-start-char->length c2)))
+                     ((and utf8? (<= #x80 (char->integer char) #xFF))
+                      (let ((len (utf8-start-char->length char)))
                         (list (utf8-string-ref str (+ i 1) len) (+ i 1 len))))
                      (else
-                      (list c2 (+ i 2)))))))))
+                      (list char (+ i 2)))))))))
               ((#\[)
                (let* ((inv? (eqv? #\^ (string-ref str (+ i 1))))
                       (i2 (if inv? (+ i 2) (+ i 1))))
@@ -1262,76 +1242,45 @@
                     (let ((j (string-scan-char str #\: (+ i2 1))))
                       (if (or (not j) (not (eqv? #\] (string-ref str (+ j 1)))))
                           (%irregex-error "incomplete character class" str)
-                          (let* ((cset (sre->cset
-                                        (string->symbol
-                                         (substring str (+ i2 1) j))))
-                                 (cset (if inv? (cset-complement cset) cset)))
-                            ;; REFACTORME
-                            (let split ((cset-as-plist (cset->plist cset))
-                                        (cset-single-chars '())
-                                        (cset-ranges '()))
-                              (cond
-                               ((null? cset-as-plist)
-                                (go (+ j 2)
-                                    (append cset-single-chars chars)
-                                    (append cset-ranges ranges)))
-                               ((char=? (car cset-as-plist) (cadr cset-as-plist))
-                                (split (cddr cset-as-plist)
-                                       (cons (car cset-as-plist) cset-single-chars)
-                                       cset-ranges))
-                               (else (split (cddr cset-as-plist)
-                                            cset-single-chars
-                                            (cons (car cset-as-plist)
-                                                  (cons (cadr cset-as-plist)
-                                                        cset-ranges))))))))))
+                          (let* ((class (sre->cset
+                                         (string->symbol
+                                          (substring str (+ i2 1) j))))
+                                 (class (if inv? (cset-complement class) class)))
+                            (go (+ j 2) #f (cset-union cset class))))))
                    ((#\= #\.)
                     (%irregex-error "collating sequences not supported" str))
                    (else
-                    (go (+ i 1) (cons #\[ chars) ranges)))))
+                    (go (+ i 1) #\[ (cset-adjoin cset #\[))))))
               ((#\\)
                (let ((c (string-ref str (+ i 1))))
                  (case c
                    ((#\d #\D #\s #\S #\w #\W)
-                    (let ((cset (sre->cset (string->sre (string #\\ c)))))
-                      ;; REFACTORME
-                      (let split ((cset-as-plist (cset->plist cset))
-                                  (cset-single-chars '())
-                                  (cset-ranges '()))
-                        (cond
-                         ((null? cset-as-plist)
-                          (go (+ i 2)
-                              (append cset-single-chars chars)
-                              (append cset-ranges ranges)))
-                         ((char=? (car cset-as-plist) (cadr cset-as-plist))
-                          (split (cddr cset-as-plist)
-                                 (cons (car cset-as-plist) cset-single-chars)
-                                 cset-ranges))
-                         (else (split (cddr cset-as-plist)
-                                       cset-single-chars
-                                       (cons (car cset-as-plist)
-                                             (cons (cadr cset-as-plist)
-                                                   cset-ranges))))))))
+                    (go (+ i 2) #f
+                        (cset-union cset
+                                    (sre->cset (string->sre (string #\\ c))))))
                    ((#\x)
                     (apply
                      (lambda (ch j)
-                       (go j (cons ch chars) ranges))
+                       (go j ch (cset-adjoin cset ch)))
                      (string-parse-hex-escape str (+ i 2) end)))
                    (else
                     (let ((c (cond ((assv c posix-escape-sequences) => cdr)
                                    (else c))))
-                      (go (+ i 2) (cons c chars) ranges))))))
+                      (go (+ i 2) c (cset-adjoin cset c)))))))
               (else
                (if (and utf8? (<= #x80 (char->integer c) #xFF))
                    (let ((len (utf8-start-char->length c)))
                      (go (+ i len)
-                         (cons (utf8-string-ref str i len) chars)
-                         ranges))
-                   (go (+ i 1) (cons c chars) ranges)))))))
+                         (utf8-string-ref str i len)
+                         (cset-adjoin cset (utf8-string-ref str i len))))
+                   (go (+ i 1) c (cset-adjoin cset c))))))))
     (if invert?
         (go (+ start 1)
-            (if (flag-set? flags ~multi-line?) '(#\newline) '())
-            '())
-        (go start '() '()))))
+            #f
+            (if (flag-set? flags ~multi-line?)
+                (char->cset #\newline)
+                (make-cset)))
+        (go start #f (make-cset)))))
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;; UTF-8 Utilities
@@ -2419,9 +2368,8 @@
       (define (add-char-state! next ch)
         (let ((alt (char-altcase ch)))
           (if (flag-set? flags ~case-insensitive?)
-              (extend-state! next (cset-union (range->cset ch ch)
-                                              (range->cset alt alt)))
-              (extend-state! next (range->cset ch ch)))))
+              (extend-state! next (cset-union (char->cset ch) (char->cset alt)))
+              (extend-state! next (char->cset ch)))))
       (if (null? ls)
           next
           (cond
@@ -3543,6 +3491,7 @@
 
 (define (make-cset) (vector))
 (define (range->cset from to) (vector (cons from to)))
+(define (char->cset ch) (vector (cons ch ch)))
 (define (cset-empty? cs) (zero? (vector-length cs)))
 (define (maybe-cset->char cs)
   (if (and (= (vector-length cs) 1)
@@ -3578,7 +3527,7 @@
 
 (define (string->cset s)
   (fold (lambda (ch cs)
-          (cset-union (range->cset ch ch) cs))
+          (cset-adjoin cs ch))
         (make-cset)
         (string->list s)))
 
@@ -3672,6 +3621,8 @@
                             (cons (char-ranges-union a-range b-range) (cdr b))
                             res))))))))
 
+(define (cset-adjoin cs ch) (cset-union cs (char->cset ch)))
+
 (define (next-char c)
   (integer->char (+ (char->integer c) 1)))
 
Trap