~ chicken-core (chicken-5) /irregex-utils.scm


  1;;;; irregex-utils.scm
  2;;
  3;; Copyright (c) 2010 Alex Shinn.  All rights reserved.
  4;; BSD-style license: http://synthcode.com/license.txt
  5
  6(define rx-special-chars
  7  "\\|[](){}.*+?^$#")
  8
  9(define (string-scan-char str c . o)
 10  (let ((end (string-length str)))
 11    (let scan ((i (if (pair? o) (car o) 0)))
 12      (cond ((= i end) #f)
 13            ((eqv? c (string-ref str i)) i)
 14            (else (scan (+ i 1)))))))
 15
 16(define (irregex-quote str)
 17  (list->string
 18   (let loop ((ls (string->list str)) (res '()))
 19     (if (null? ls)
 20         (reverse res)
 21         (let ((c (car ls)))
 22           (if (string-scan-char rx-special-chars c)
 23               (loop (cdr ls) (cons c (cons #\\ res)))
 24               (loop (cdr ls) (cons c res))))))))
 25
 26;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 27
 28(define (irregex-opt ls)
 29  (define (make-alt ls)
 30    (cond ((null? (cdr ls)) (car ls))
 31          ((every char? ls) (list (list->string ls)))
 32          (else (cons 'or ls))))
 33  (define (make-seq ls)
 34    (cond ((null? (cdr ls)) (car ls))
 35          ((every (lambda (x) (or (string? x) (char? x))) ls)
 36           (apply string-append (map (lambda (x) (if (char? x) (string x) x)) ls)))
 37          (else (cons 'seq ls))))
 38  (cond
 39   ((null? ls) "")
 40   ((null? (cdr ls)) (car ls))
 41   (else
 42    (let ((chars (make-vector 256 '())))
 43      (let lp1 ((ls ls) (empty? #f))
 44        (if (null? ls)
 45            (let lp2 ((i 0) (res '()))
 46              (if (= i 256)
 47                  (let ((res (make-alt (reverse res))))
 48                    (if empty? `(? ,res) res))
 49                  (let ((c (integer->char i))
 50                        (opts (vector-ref chars i)))
 51                    (lp2 (+ i 1)
 52                         (cond
 53                          ((null? opts) res)
 54                          ((equal? opts '("")) `(,c ,@res))
 55                          (else `(,(make-seq (list c (irregex-opt opts)))
 56                                  ,@res)))))))
 57            (let* ((str (car ls))
 58                   (len (string-length str)))
 59              (if (zero? len)
 60                  (lp1 (cdr ls) #t)
 61                  (let ((i (char->integer (string-ref str 0))))
 62                    (vector-set!
 63                     chars
 64                     i
 65                     (cons (substring str 1 len) (vector-ref chars i)))
 66                    (lp1 (cdr ls) empty?))))))))))
 67
 68;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 69
 70(define (cset->string ls)
 71  (let ((out (open-output-string)))
 72    (let lp ((ls ls))
 73      (cond
 74       ((pair? ls)
 75        (cond
 76         ((pair? (car ls))
 77          (display (irregex-quote (string (caar ls))) out)
 78          (write-char #\- out)
 79          (display (irregex-quote (string (cdar ls))) out))
 80         (else (display (irregex-quote (string (car ls))) out)))
 81        (lp (cdr ls)))))
 82    (get-output-string out)))
 83
 84(define (sre->string obj)
 85  (let ((out (open-output-string)))
 86    (let lp ((x obj))
 87      (cond
 88       ((pair? x)
 89        (case (car x)
 90          ((: seq)
 91           (cond
 92            ((and (pair? (cdr x)) (pair? (cddr x)) (not (eq? x obj)))
 93             (display "(?:" out) (for-each lp (cdr x)) (display ")" out))
 94            (else (for-each lp (cdr x)))))
 95          ((submatch)
 96           (display "(" out) (for-each lp (cdr x)) (display ")" out))
 97          ((submatch-named)
 98           (display "(?<" out) (display (cadr x) out) (display ">" out)
 99           (for-each lp (cddr x)) (display ")" out))
100          ((or)
101           (display "(?:" out)
102           (lp (cadr x))
103           (for-each (lambda (x) (display "|" out) (lp x)) (cddr x))
104           (display ")" out))
105          ((* + ? *? ??)
106           (cond
107            ((or (pair? (cddr x)) (and (string? (cadr x)) (not (= 1 (string-length (cadr x))))))
108             (display "(?:" out) (for-each lp (cdr x)) (display ")" out))
109            (else (lp (cadr x))))
110           (display (car x) out))
111          ((not)
112           (cond
113            ((and (pair? (cadr x)) (eq? 'cset (caadr x)))
114             (display "[^" out)
115             (display (cset->string (cdadr x)) out)
116             (display "]" out))
117            (else (error "can't represent general 'not' in strings" x))))
118          ((cset)
119           (display "[" out)
120           (display (cset->string (cdr x)) out)
121           (display "]" out))
122          ((- & / ~)
123           (cond
124            ((or (eqv? #\~ (car x))
125                 (and (eq? '- (car x)) (pair? (cdr x)) (eq? 'any (cadr x))))
126             (display "[^" out)
127             (display (cset->string (if (eqv? #\~ (car x)) (cdr x) (cddr x))) out)
128             (display "]" out))
129            (else
130             (lp `(cset ,@(sre->cset x))))))
131          ((w/case w/nocase)
132           (display "(?" out)
133           (if (eq? (car x) 'w/case) (display "-" out))
134           (display ":" out)
135           (for-each lp (cdr x))
136           (display ")" out))
137          (else
138           (if (string? (car x))
139               (lp `(cset ,@(string->list (car x))))
140               (error "unknown sre operator" x)))))
141       ((symbol? x)
142        (case x
143          ((bos bol) (display "^" out))
144          ((eos eol) (display "$" out))
145          ((any nonl) (display "." out))
146          (else (error "unknown sre symbol" x))))
147       ((string? x)
148        (display (irregex-quote x) out))
149       ((char? x)
150        (display (irregex-quote (string x)) out))
151       (else
152        (error "unknown sre pattern" x))))
153    (get-output-string out)))
154
Trap