~ chicken-core (chicken-5) /irregex-utils.scm
Trap1;;;; irregex-utils.scm2;;3;; Copyright (c) 2010 Alex Shinn. All rights reserved.4;; BSD-style license: http://synthcode.com/license.txt56(define rx-special-chars7 "\\|[](){}.*+?^$#")89(define (string-scan-char str c . o)10 (let ((end (string-length str)))11 (let scan ((i (if (pair? o) (car o) 0)))12 (cond ((= i end) #f)13 ((eqv? c (string-ref str i)) i)14 (else (scan (+ i 1)))))))1516(define (irregex-quote str)17 (list->string18 (let loop ((ls (string->list str)) (res '()))19 (if (null? ls)20 (reverse res)21 (let ((c (car ls)))22 (if (string-scan-char rx-special-chars c)23 (loop (cdr ls) (cons c (cons #\\ res)))24 (loop (cdr ls) (cons c res))))))))2526;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2728(define (irregex-opt ls)29 (define (make-alt ls)30 (cond ((null? (cdr ls)) (car ls))31 ((every char? ls) (list (list->string ls)))32 (else (cons 'or ls))))33 (define (make-seq ls)34 (cond ((null? (cdr ls)) (car ls))35 ((every (lambda (x) (or (string? x) (char? x))) ls)36 (apply string-append (map (lambda (x) (if (char? x) (string x) x)) ls)))37 (else (cons 'seq ls))))38 (cond39 ((null? ls) "")40 ((null? (cdr ls)) (car ls))41 (else42 (let ((chars (make-vector 256 '())))43 (let lp1 ((ls ls) (empty? #f))44 (if (null? ls)45 (let lp2 ((i 0) (res '()))46 (if (= i 256)47 (let ((res (make-alt (reverse res))))48 (if empty? `(? ,res) res))49 (let ((c (integer->char i))50 (opts (vector-ref chars i)))51 (lp2 (+ i 1)52 (cond53 ((null? opts) res)54 ((equal? opts '("")) `(,c ,@res))55 (else `(,(make-seq (list c (irregex-opt opts)))56 ,@res)))))))57 (let* ((str (car ls))58 (len (string-length str)))59 (if (zero? len)60 (lp1 (cdr ls) #t)61 (let ((i (char->integer (string-ref str 0))))62 (vector-set!63 chars64 i65 (cons (substring str 1 len) (vector-ref chars i)))66 (lp1 (cdr ls) empty?))))))))))6768;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;6970(define (cset->string ls)71 (let ((out (open-output-string)))72 (let lp ((ls ls))73 (cond74 ((pair? ls)75 (cond76 ((pair? (car ls))77 (display (irregex-quote (string (caar ls))) out)78 (write-char #\- out)79 (display (irregex-quote (string (cdar ls))) out))80 (else (display (irregex-quote (string (car ls))) out)))81 (lp (cdr ls)))))82 (get-output-string out)))8384(define (sre->string obj)85 (let ((out (open-output-string)))86 (let lp ((x obj))87 (cond88 ((pair? x)89 (case (car x)90 ((: seq)91 (cond92 ((and (pair? (cdr x)) (pair? (cddr x)) (not (eq? x obj)))93 (display "(?:" out) (for-each lp (cdr x)) (display ")" out))94 (else (for-each lp (cdr x)))))95 ((submatch)96 (display "(" out) (for-each lp (cdr x)) (display ")" out))97 ((submatch-named)98 (display "(?<" out) (display (cadr x) out) (display ">" out)99 (for-each lp (cddr x)) (display ")" out))100 ((or)101 (display "(?:" out)102 (lp (cadr x))103 (for-each (lambda (x) (display "|" out) (lp x)) (cddr x))104 (display ")" out))105 ((* + ? *? ??)106 (cond107 ((or (pair? (cddr x)) (and (string? (cadr x)) (not (= 1 (string-length (cadr x))))))108 (display "(?:" out) (for-each lp (cdr x)) (display ")" out))109 (else (lp (cadr x))))110 (display (car x) out))111 ((not)112 (cond113 ((and (pair? (cadr x)) (eq? 'cset (caadr x)))114 (display "[^" out)115 (display (cset->string (cdadr x)) out)116 (display "]" out))117 (else (error "can't represent general 'not' in strings" x))))118 ((cset)119 (display "[" out)120 (display (cset->string (cdr x)) out)121 (display "]" out))122 ((- & / ~)123 (cond124 ((or (eqv? #\~ (car x))125 (and (eq? '- (car x)) (pair? (cdr x)) (eq? 'any (cadr x))))126 (display "[^" out)127 (display (cset->string (if (eqv? #\~ (car x)) (cdr x) (cddr x))) out)128 (display "]" out))129 (else130 (lp `(cset ,@(sre->cset x))))))131 ((w/case w/nocase)132 (display "(?" out)133 (if (eq? (car x) 'w/case) (display "-" out))134 (display ":" out)135 (for-each lp (cdr x))136 (display ")" out))137 (else138 (if (string? (car x))139 (lp `(cset ,@(string->list (car x))))140 (error "unknown sre operator" x)))))141 ((symbol? x)142 (case x143 ((bos bol) (display "^" out))144 ((eos eol) (display "$" out))145 ((any nonl) (display "." out))146 (else (error "unknown sre symbol" x))))147 ((string? x)148 (display (irregex-quote x) out))149 ((char? x)150 (display (irregex-quote (string x)) out))151 (else152 (error "unknown sre pattern" x))))153 (get-output-string out)))154