~ chicken-core (chicken-5) cddf07cd73071acf7398c8e18b310a6c5418d609
commit cddf07cd73071acf7398c8e18b310a6c5418d609 Author: felix <felix@call-with-current-continuation.org> AuthorDate: Tue Jul 27 13:23:22 2010 +0200 Commit: felix <felix@call-with-current-continuation.org> CommitDate: Tue Jul 27 13:23:22 2010 +0200 added irregex-utils; rebased to experimental diff --git a/distribution/manifest b/distribution/manifest index c0a3429a..d01a4201 100644 --- a/distribution/manifest +++ b/distribution/manifest @@ -79,6 +79,7 @@ scrutinizer.scm unboxing.scm irregex.scm irregex-core.scm +irregex-utils.scm posixunix.scm posixwin.scm posix-common.scm diff --git a/irregex-utils.scm b/irregex-utils.scm new file mode 100644 index 00000000..8332791d --- /dev/null +++ b/irregex-utils.scm @@ -0,0 +1,154 @@ +;;;; irregex-utils.scm +;; +;; Copyright (c) 2010 Alex Shinn. All rights reserved. +;; BSD-style license: http://synthcode.com/license.txt + +(define rx-special-chars + "\\|[](){}.*+?^$#") + +(define (string-scan-char str c . o) + (let ((end (string-length str))) + (let scan ((i (if (pair? o) (car o) 0))) + (cond ((= i end) #f) + ((eqv? c (string-ref str i)) i) + (else (scan (+ i 1))))))) + +(define (irregex-quote str) + (list->string + (let loop ((ls (string->list str)) (res '())) + (if (null? ls) + (reverse res) + (let ((c (car ls))) + (if (string-scan-char rx-special-chars c) + (loop (cdr ls) (cons c (cons #\\ res))) + (loop (cdr ls) (cons c res)))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (irregex-opt ls) + (define (make-alt ls) + (cond ((null? (cdr ls)) (car ls)) + ((every char? ls) (list (list->string ls))) + (else (cons 'or ls)))) + (define (make-seq ls) + (cond ((null? (cdr ls)) (car ls)) + ((every (lambda (x) (or (string? x) (char? x))) ls) + (apply string-append (map (lambda (x) (if (char? x) (string x) x)) ls))) + (else (cons 'seq ls)))) + (cond + ((null? ls) "") + ((null? (cdr ls)) (car ls)) + (else + (let ((chars (make-vector 256 '()))) + (let lp1 ((ls ls) (empty? #f)) + (if (null? ls) + (let lp2 ((i 0) (res '())) + (if (= i 256) + (let ((res (make-alt (reverse res)))) + (if empty? `(? ,res) res)) + (let ((c (integer->char i)) + (opts (vector-ref chars i))) + (lp2 (+ i 1) + (cond + ((null? opts) res) + ((equal? opts '("")) `(,c ,@res)) + (else `(,(make-seq (list c (irregex-opt opts))) + ,@res))))))) + (let* ((str (car ls)) + (len (string-length str))) + (if (zero? len) + (lp1 (cdr ls) #t) + (let ((i (char->integer (string-ref str 0)))) + (vector-set! + chars + i + (cons (substring str 1 len) (vector-ref chars i))) + (lp1 (cdr ls) empty?)))))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (cset->string ls) + (let ((out (open-output-string))) + (let lp ((ls ls)) + (cond + ((pair? ls) + (cond + ((pair? (car ls)) + (display (irregex-quote (string (caar ls))) out) + (write-char #\- out) + (display (irregex-quote (string (cdar ls))) out)) + (else (display (irregex-quote (string (car ls))) out))) + (lp (cdr ls))))) + (get-output-string out))) + +(define (sre->string obj) + (let ((out (open-output-string))) + (let lp ((x obj)) + (cond + ((pair? x) + (case (car x) + ((: seq) + (cond + ((and (pair? (cddr x)) (pair? (cddr x)) (not (eq? x obj))) + (display "(?:" out) (for-each lp (cdr x)) (display ")" out)) + (else (for-each lp (cdr x))))) + ((submatch) + (display "(" out) (for-each lp (cdr x)) (display ")" out)) + ((submatch-named) + (display "(?<" out) (display (cadr x) out) (display ">" out) + (for-each lp (cddr x)) (display ")" out)) + ((or) + (display "(?:" out) + (lp (cadr x)) + (for-each (lambda (x) (display "|" out) (lp x)) (cddr x)) + (display ")" out)) + ((* + ? *? ??) + (cond + ((pair? (cddr x)) + (display "(?:" out) (for-each lp (cdr x)) (display ")" out)) + (else (lp (cadr x)))) + (display (car x) out)) + ((not) + (cond + ((and (pair? (cadr x)) (eq? 'cset (caadr x))) + (display "[^" out) + (display (cset->string (cdadr x)) out) + (display "]" out)) + (else (error "can't represent general 'not' in strings" x)))) + ((cset) + (display "[" out) + (display (cset->string (cdr x)) out) + (display "]" out)) + ((- & / ~) + (cond + ((or (eq? #\~ (car x)) + (and (eq? '- (car x)) (pair? (cdr x)) (eq? 'any (cadr x)))) + (display "[^" out) + (display (cset->string (if (eq? #\~ (car x)) (cdr x) (cddr x))) out) + (display "]" out)) + (else + (lp `(cset ,@(sre->cset x)))))) + ((w/case w/nocase) + (display "(?" out) + (if (eq? (car x) 'w/case) (display "-" out)) + (display ":" out) + (for-each lp (cdr x)) + (display ")" out)) + (else + (if (string? (car x)) + (lp `(cset ,@(string->list (car x)))) + (error "unknown sre operator" x))))) + ((symbol? x) + (case x + ((bos bol) (display "^" out)) + ((eos eol) (display "$" out)) + ((any nonl) (display "." out)) + (else (error "unknown sre symbol" x)))) + ((string? x) + (display (irregex-quote x) out)) + ((char? x) + (display (irregex-quote (string x)) out)) + (else + (error "unknown sre pattern" x)))) + (get-output-string out))) + diff --git a/irregex.import.scm b/irregex.import.scm index 53e001d0..9072848a 100644 --- a/irregex.import.scm +++ b/irregex.import.scm @@ -69,6 +69,7 @@ make-irregex-chunker maybe-string->sre sre->irregex + sre->string string->irregex string->sre )) diff --git a/irregex.scm b/irregex.scm index afa0c0ce..655d5554 100644 --- a/irregex.scm +++ b/irregex.scm @@ -76,6 +76,7 @@ maybe-string->sre irregex-search/chunked sre->irregex + sre->string string->irregex string->sre )) @@ -123,6 +124,7 @@ ,(fold (add1 i)))))))) (include "irregex-core.scm") +(include "irregex-utils.scm") (define ##sys#glob->regexp (let ((list->string list->string) diff --git a/rules.make b/rules.make index ff843247..a2888e16 100644 --- a/rules.make +++ b/rules.make @@ -852,7 +852,7 @@ posixunix.c: $(SRCDIR)posixunix.scm $(SRCDIR)posix-common.scm $(SRCDIR)common-de $(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@ posixwin.c: $(SRCDIR)posixwin.scm $(SRCDIR)posix-common.scm $(SRCDIR)common-declarations.scm $(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@ -irregex.c: $(SRCDIR)irregex.scm $(SRCDIR)irregex-core.scm $(SRCDIR)common-declarations.scm +irregex.c: $(SRCDIR)irregex.scm $(SRCDIR)irregex-core.scm $(SRCDIR)irregex-utils.scm $(SRCDIR)common-declarations.scm $(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@ scheduler.c: $(SRCDIR)scheduler.scm $(SRCDIR)common-declarations.scm $(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@Trap