~ chicken-core (chicken-5) cddf07cd73071acf7398c8e18b310a6c5418d609
commit cddf07cd73071acf7398c8e18b310a6c5418d609
Author: felix <felix@call-with-current-continuation.org>
AuthorDate: Tue Jul 27 13:23:22 2010 +0200
Commit: felix <felix@call-with-current-continuation.org>
CommitDate: Tue Jul 27 13:23:22 2010 +0200
added irregex-utils; rebased to experimental
diff --git a/distribution/manifest b/distribution/manifest
index c0a3429a..d01a4201 100644
--- a/distribution/manifest
+++ b/distribution/manifest
@@ -79,6 +79,7 @@ scrutinizer.scm
unboxing.scm
irregex.scm
irregex-core.scm
+irregex-utils.scm
posixunix.scm
posixwin.scm
posix-common.scm
diff --git a/irregex-utils.scm b/irregex-utils.scm
new file mode 100644
index 00000000..8332791d
--- /dev/null
+++ b/irregex-utils.scm
@@ -0,0 +1,154 @@
+;;;; irregex-utils.scm
+;;
+;; Copyright (c) 2010 Alex Shinn. All rights reserved.
+;; BSD-style license: http://synthcode.com/license.txt
+
+(define rx-special-chars
+ "\\|[](){}.*+?^$#")
+
+(define (string-scan-char str c . o)
+ (let ((end (string-length str)))
+ (let scan ((i (if (pair? o) (car o) 0)))
+ (cond ((= i end) #f)
+ ((eqv? c (string-ref str i)) i)
+ (else (scan (+ i 1)))))))
+
+(define (irregex-quote str)
+ (list->string
+ (let loop ((ls (string->list str)) (res '()))
+ (if (null? ls)
+ (reverse res)
+ (let ((c (car ls)))
+ (if (string-scan-char rx-special-chars c)
+ (loop (cdr ls) (cons c (cons #\\ res)))
+ (loop (cdr ls) (cons c res))))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define (irregex-opt ls)
+ (define (make-alt ls)
+ (cond ((null? (cdr ls)) (car ls))
+ ((every char? ls) (list (list->string ls)))
+ (else (cons 'or ls))))
+ (define (make-seq ls)
+ (cond ((null? (cdr ls)) (car ls))
+ ((every (lambda (x) (or (string? x) (char? x))) ls)
+ (apply string-append (map (lambda (x) (if (char? x) (string x) x)) ls)))
+ (else (cons 'seq ls))))
+ (cond
+ ((null? ls) "")
+ ((null? (cdr ls)) (car ls))
+ (else
+ (let ((chars (make-vector 256 '())))
+ (let lp1 ((ls ls) (empty? #f))
+ (if (null? ls)
+ (let lp2 ((i 0) (res '()))
+ (if (= i 256)
+ (let ((res (make-alt (reverse res))))
+ (if empty? `(? ,res) res))
+ (let ((c (integer->char i))
+ (opts (vector-ref chars i)))
+ (lp2 (+ i 1)
+ (cond
+ ((null? opts) res)
+ ((equal? opts '("")) `(,c ,@res))
+ (else `(,(make-seq (list c (irregex-opt opts)))
+ ,@res)))))))
+ (let* ((str (car ls))
+ (len (string-length str)))
+ (if (zero? len)
+ (lp1 (cdr ls) #t)
+ (let ((i (char->integer (string-ref str 0))))
+ (vector-set!
+ chars
+ i
+ (cons (substring str 1 len) (vector-ref chars i)))
+ (lp1 (cdr ls) empty?))))))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define (cset->string ls)
+ (let ((out (open-output-string)))
+ (let lp ((ls ls))
+ (cond
+ ((pair? ls)
+ (cond
+ ((pair? (car ls))
+ (display (irregex-quote (string (caar ls))) out)
+ (write-char #\- out)
+ (display (irregex-quote (string (cdar ls))) out))
+ (else (display (irregex-quote (string (car ls))) out)))
+ (lp (cdr ls)))))
+ (get-output-string out)))
+
+(define (sre->string obj)
+ (let ((out (open-output-string)))
+ (let lp ((x obj))
+ (cond
+ ((pair? x)
+ (case (car x)
+ ((: seq)
+ (cond
+ ((and (pair? (cddr x)) (pair? (cddr x)) (not (eq? x obj)))
+ (display "(?:" out) (for-each lp (cdr x)) (display ")" out))
+ (else (for-each lp (cdr x)))))
+ ((submatch)
+ (display "(" out) (for-each lp (cdr x)) (display ")" out))
+ ((submatch-named)
+ (display "(?<" out) (display (cadr x) out) (display ">" out)
+ (for-each lp (cddr x)) (display ")" out))
+ ((or)
+ (display "(?:" out)
+ (lp (cadr x))
+ (for-each (lambda (x) (display "|" out) (lp x)) (cddr x))
+ (display ")" out))
+ ((* + ? *? ??)
+ (cond
+ ((pair? (cddr x))
+ (display "(?:" out) (for-each lp (cdr x)) (display ")" out))
+ (else (lp (cadr x))))
+ (display (car x) out))
+ ((not)
+ (cond
+ ((and (pair? (cadr x)) (eq? 'cset (caadr x)))
+ (display "[^" out)
+ (display (cset->string (cdadr x)) out)
+ (display "]" out))
+ (else (error "can't represent general 'not' in strings" x))))
+ ((cset)
+ (display "[" out)
+ (display (cset->string (cdr x)) out)
+ (display "]" out))
+ ((- & / ~)
+ (cond
+ ((or (eq? #\~ (car x))
+ (and (eq? '- (car x)) (pair? (cdr x)) (eq? 'any (cadr x))))
+ (display "[^" out)
+ (display (cset->string (if (eq? #\~ (car x)) (cdr x) (cddr x))) out)
+ (display "]" out))
+ (else
+ (lp `(cset ,@(sre->cset x))))))
+ ((w/case w/nocase)
+ (display "(?" out)
+ (if (eq? (car x) 'w/case) (display "-" out))
+ (display ":" out)
+ (for-each lp (cdr x))
+ (display ")" out))
+ (else
+ (if (string? (car x))
+ (lp `(cset ,@(string->list (car x))))
+ (error "unknown sre operator" x)))))
+ ((symbol? x)
+ (case x
+ ((bos bol) (display "^" out))
+ ((eos eol) (display "$" out))
+ ((any nonl) (display "." out))
+ (else (error "unknown sre symbol" x))))
+ ((string? x)
+ (display (irregex-quote x) out))
+ ((char? x)
+ (display (irregex-quote (string x)) out))
+ (else
+ (error "unknown sre pattern" x))))
+ (get-output-string out)))
+
diff --git a/irregex.import.scm b/irregex.import.scm
index 53e001d0..9072848a 100644
--- a/irregex.import.scm
+++ b/irregex.import.scm
@@ -69,6 +69,7 @@
make-irregex-chunker
maybe-string->sre
sre->irregex
+ sre->string
string->irregex
string->sre
))
diff --git a/irregex.scm b/irregex.scm
index afa0c0ce..655d5554 100644
--- a/irregex.scm
+++ b/irregex.scm
@@ -76,6 +76,7 @@
maybe-string->sre
irregex-search/chunked
sre->irregex
+ sre->string
string->irregex
string->sre
))
@@ -123,6 +124,7 @@
,(fold (add1 i))))))))
(include "irregex-core.scm")
+(include "irregex-utils.scm")
(define ##sys#glob->regexp
(let ((list->string list->string)
diff --git a/rules.make b/rules.make
index ff843247..a2888e16 100644
--- a/rules.make
+++ b/rules.make
@@ -852,7 +852,7 @@ posixunix.c: $(SRCDIR)posixunix.scm $(SRCDIR)posix-common.scm $(SRCDIR)common-de
$(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@
posixwin.c: $(SRCDIR)posixwin.scm $(SRCDIR)posix-common.scm $(SRCDIR)common-declarations.scm
$(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@
-irregex.c: $(SRCDIR)irregex.scm $(SRCDIR)irregex-core.scm $(SRCDIR)common-declarations.scm
+irregex.c: $(SRCDIR)irregex.scm $(SRCDIR)irregex-core.scm $(SRCDIR)irregex-utils.scm $(SRCDIR)common-declarations.scm
$(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@
scheduler.c: $(SRCDIR)scheduler.scm $(SRCDIR)common-declarations.scm
$(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@
Trap