~ chicken-core (chicken-5) cddf07cd73071acf7398c8e18b310a6c5418d609


commit cddf07cd73071acf7398c8e18b310a6c5418d609
Author:     felix <felix@call-with-current-continuation.org>
AuthorDate: Tue Jul 27 13:23:22 2010 +0200
Commit:     felix <felix@call-with-current-continuation.org>
CommitDate: Tue Jul 27 13:23:22 2010 +0200

    added irregex-utils; rebased to experimental

diff --git a/distribution/manifest b/distribution/manifest
index c0a3429a..d01a4201 100644
--- a/distribution/manifest
+++ b/distribution/manifest
@@ -79,6 +79,7 @@ scrutinizer.scm
 unboxing.scm
 irregex.scm
 irregex-core.scm
+irregex-utils.scm
 posixunix.scm
 posixwin.scm
 posix-common.scm
diff --git a/irregex-utils.scm b/irregex-utils.scm
new file mode 100644
index 00000000..8332791d
--- /dev/null
+++ b/irregex-utils.scm
@@ -0,0 +1,154 @@
+;;;; irregex-utils.scm
+;;
+;; Copyright (c) 2010 Alex Shinn.  All rights reserved.
+;; BSD-style license: http://synthcode.com/license.txt
+
+(define rx-special-chars
+  "\\|[](){}.*+?^$#")
+
+(define (string-scan-char str c . o)
+  (let ((end (string-length str)))
+    (let scan ((i (if (pair? o) (car o) 0)))
+      (cond ((= i end) #f)
+            ((eqv? c (string-ref str i)) i)
+            (else (scan (+ i 1)))))))
+
+(define (irregex-quote str)
+  (list->string
+   (let loop ((ls (string->list str)) (res '()))
+     (if (null? ls)
+         (reverse res)
+         (let ((c (car ls)))
+           (if (string-scan-char rx-special-chars c)
+               (loop (cdr ls) (cons c (cons #\\ res)))
+               (loop (cdr ls) (cons c res))))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define (irregex-opt ls)
+  (define (make-alt ls)
+    (cond ((null? (cdr ls)) (car ls))
+          ((every char? ls) (list (list->string ls)))
+          (else (cons 'or ls))))
+  (define (make-seq ls)
+    (cond ((null? (cdr ls)) (car ls))
+          ((every (lambda (x) (or (string? x) (char? x))) ls)
+           (apply string-append (map (lambda (x) (if (char? x) (string x) x)) ls)))
+          (else (cons 'seq ls))))
+  (cond
+   ((null? ls) "")
+   ((null? (cdr ls)) (car ls))
+   (else
+    (let ((chars (make-vector 256 '())))
+      (let lp1 ((ls ls) (empty? #f))
+        (if (null? ls)
+            (let lp2 ((i 0) (res '()))
+              (if (= i 256)
+                  (let ((res (make-alt (reverse res))))
+                    (if empty? `(? ,res) res))
+                  (let ((c (integer->char i))
+                        (opts (vector-ref chars i)))
+                    (lp2 (+ i 1)
+                         (cond
+                          ((null? opts) res)
+                          ((equal? opts '("")) `(,c ,@res))
+                          (else `(,(make-seq (list c (irregex-opt opts)))
+                                  ,@res)))))))
+            (let* ((str (car ls))
+                   (len (string-length str)))
+              (if (zero? len)
+                  (lp1 (cdr ls) #t)
+                  (let ((i (char->integer (string-ref str 0))))
+                    (vector-set!
+                     chars
+                     i
+                     (cons (substring str 1 len) (vector-ref chars i)))
+                    (lp1 (cdr ls) empty?))))))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define (cset->string ls)
+  (let ((out (open-output-string)))
+    (let lp ((ls ls))
+      (cond
+       ((pair? ls)
+        (cond
+         ((pair? (car ls))
+          (display (irregex-quote (string (caar ls))) out)
+          (write-char #\- out)
+          (display (irregex-quote (string (cdar ls))) out))
+         (else (display (irregex-quote (string (car ls))) out)))
+        (lp (cdr ls)))))
+    (get-output-string out)))
+
+(define (sre->string obj)
+  (let ((out (open-output-string)))
+    (let lp ((x obj))
+      (cond
+       ((pair? x)
+        (case (car x)
+          ((: seq)
+           (cond
+            ((and (pair? (cddr x)) (pair? (cddr x)) (not (eq? x obj)))
+             (display "(?:" out) (for-each lp (cdr x)) (display ")" out))
+            (else (for-each lp (cdr x)))))
+          ((submatch)
+           (display "(" out) (for-each lp (cdr x)) (display ")" out))
+          ((submatch-named)
+           (display "(?<" out) (display (cadr x) out) (display ">" out)
+           (for-each lp (cddr x)) (display ")" out))
+          ((or)
+           (display "(?:" out)
+           (lp (cadr x))
+           (for-each (lambda (x) (display "|" out) (lp x)) (cddr x))
+           (display ")" out))
+          ((* + ? *? ??)
+           (cond
+            ((pair? (cddr x))
+             (display "(?:" out) (for-each lp (cdr x)) (display ")" out))
+            (else (lp (cadr x))))
+           (display (car x) out))
+          ((not)
+           (cond
+            ((and (pair? (cadr x)) (eq? 'cset (caadr x)))
+             (display "[^" out)
+             (display (cset->string (cdadr x)) out)
+             (display "]" out))
+            (else (error "can't represent general 'not' in strings" x))))
+          ((cset)
+           (display "[" out)
+           (display (cset->string (cdr x)) out)
+           (display "]" out))
+          ((- & / ~)
+           (cond
+            ((or (eq? #\~ (car x))
+                 (and (eq? '- (car x)) (pair? (cdr x)) (eq? 'any (cadr x))))
+             (display "[^" out)
+             (display (cset->string (if (eq? #\~ (car x)) (cdr x) (cddr x))) out)
+             (display "]" out))
+            (else
+             (lp `(cset ,@(sre->cset x))))))
+          ((w/case w/nocase)
+           (display "(?" out)
+           (if (eq? (car x) 'w/case) (display "-" out))
+           (display ":" out)
+           (for-each lp (cdr x))
+           (display ")" out))
+          (else
+           (if (string? (car x))
+               (lp `(cset ,@(string->list (car x))))
+               (error "unknown sre operator" x)))))
+       ((symbol? x)
+        (case x
+          ((bos bol) (display "^" out))
+          ((eos eol) (display "$" out))
+          ((any nonl) (display "." out))
+          (else (error "unknown sre symbol" x))))
+       ((string? x)
+        (display (irregex-quote x) out))
+       ((char? x)
+        (display (irregex-quote (string x)) out))
+       (else
+        (error "unknown sre pattern" x))))
+    (get-output-string out)))
+
diff --git a/irregex.import.scm b/irregex.import.scm
index 53e001d0..9072848a 100644
--- a/irregex.import.scm
+++ b/irregex.import.scm
@@ -69,6 +69,7 @@
    make-irregex-chunker
    maybe-string->sre
    sre->irregex
+   sre->string
    string->irregex 
    string->sre
    ))
diff --git a/irregex.scm b/irregex.scm
index afa0c0ce..655d5554 100644
--- a/irregex.scm
+++ b/irregex.scm
@@ -76,6 +76,7 @@
    maybe-string->sre
    irregex-search/chunked
    sre->irregex
+   sre->string
    string->irregex
    string->sre
    ))
@@ -123,6 +124,7 @@
 			   ,(fold (add1 i))))))))
 
 (include "irregex-core.scm")
+(include "irregex-utils.scm")
 
 (define ##sys#glob->regexp
   (let ((list->string list->string)
diff --git a/rules.make b/rules.make
index ff843247..a2888e16 100644
--- a/rules.make
+++ b/rules.make
@@ -852,7 +852,7 @@ posixunix.c: $(SRCDIR)posixunix.scm $(SRCDIR)posix-common.scm $(SRCDIR)common-de
 	$(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@ 
 posixwin.c: $(SRCDIR)posixwin.scm $(SRCDIR)posix-common.scm $(SRCDIR)common-declarations.scm
 	$(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@ 
-irregex.c: $(SRCDIR)irregex.scm $(SRCDIR)irregex-core.scm $(SRCDIR)common-declarations.scm
+irregex.c: $(SRCDIR)irregex.scm $(SRCDIR)irregex-core.scm $(SRCDIR)irregex-utils.scm $(SRCDIR)common-declarations.scm
 	$(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@
 scheduler.c: $(SRCDIR)scheduler.scm $(SRCDIR)common-declarations.scm
 	$(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@ 
Trap