~ chicken-core (chicken-5) /irregex-utils.scm
Trap1;;;; irregex-utils.scm
2;;
3;; Copyright (c) 2010 Alex Shinn. All rights reserved.
4;; BSD-style license: http://synthcode.com/license.txt
5
6(define rx-special-chars
7 "\\|[](){}.*+?^$#")
8
9(define (string-scan-char str c . o)
10 (let ((end (string-length str)))
11 (let scan ((i (if (pair? o) (car o) 0)))
12 (cond ((= i end) #f)
13 ((eqv? c (string-ref str i)) i)
14 (else (scan (+ i 1)))))))
15
16(define (irregex-quote str)
17 (list->string
18 (let loop ((ls (string->list str)) (res '()))
19 (if (null? ls)
20 (reverse res)
21 (let ((c (car ls)))
22 (if (string-scan-char rx-special-chars c)
23 (loop (cdr ls) (cons c (cons #\\ res)))
24 (loop (cdr ls) (cons c res))))))))
25
26;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27
28(define (irregex-opt ls)
29 (define (make-alt ls)
30 (cond ((null? (cdr ls)) (car ls))
31 ((every char? ls) (list (list->string ls)))
32 (else (cons 'or ls))))
33 (define (make-seq ls)
34 (cond ((null? (cdr ls)) (car ls))
35 ((every (lambda (x) (or (string? x) (char? x))) ls)
36 (apply string-append (map (lambda (x) (if (char? x) (string x) x)) ls)))
37 (else (cons 'seq ls))))
38 (cond
39 ((null? ls) "")
40 ((null? (cdr ls)) (car ls))
41 (else
42 (let ((chars (make-vector 256 '())))
43 (let lp1 ((ls ls) (empty? #f))
44 (if (null? ls)
45 (let lp2 ((i 0) (res '()))
46 (if (= i 256)
47 (let ((res (make-alt (reverse res))))
48 (if empty? `(? ,res) res))
49 (let ((c (integer->char i))
50 (opts (vector-ref chars i)))
51 (lp2 (+ i 1)
52 (cond
53 ((null? opts) res)
54 ((equal? opts '("")) `(,c ,@res))
55 (else `(,(make-seq (list c (irregex-opt opts)))
56 ,@res)))))))
57 (let* ((str (car ls))
58 (len (string-length str)))
59 (if (zero? len)
60 (lp1 (cdr ls) #t)
61 (let ((i (char->integer (string-ref str 0))))
62 (vector-set!
63 chars
64 i
65 (cons (substring str 1 len) (vector-ref chars i)))
66 (lp1 (cdr ls) empty?))))))))))
67
68;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
69
70(define (cset->string ls)
71 (let ((out (open-output-string)))
72 (let lp ((ls ls))
73 (cond
74 ((pair? ls)
75 (cond
76 ((pair? (car ls))
77 (display (irregex-quote (string (caar ls))) out)
78 (write-char #\- out)
79 (display (irregex-quote (string (cdar ls))) out))
80 (else (display (irregex-quote (string (car ls))) out)))
81 (lp (cdr ls)))))
82 (get-output-string out)))
83
84(define (sre->string obj)
85 (let ((out (open-output-string)))
86 (let lp ((x obj))
87 (cond
88 ((pair? x)
89 (case (car x)
90 ((: seq)
91 (cond
92 ((and (pair? (cdr x)) (pair? (cddr x)) (not (eq? x obj)))
93 (display "(?:" out) (for-each lp (cdr x)) (display ")" out))
94 (else (for-each lp (cdr x)))))
95 ((submatch)
96 (display "(" out) (for-each lp (cdr x)) (display ")" out))
97 ((submatch-named)
98 (display "(?<" out) (display (cadr x) out) (display ">" out)
99 (for-each lp (cddr x)) (display ")" out))
100 ((or)
101 (display "(?:" out)
102 (lp (cadr x))
103 (for-each (lambda (x) (display "|" out) (lp x)) (cddr x))
104 (display ")" out))
105 ((* + ? *? ??)
106 (cond
107 ((or (pair? (cddr x)) (and (string? (cadr x)) (not (= 1 (string-length (cadr x))))))
108 (display "(?:" out) (for-each lp (cdr x)) (display ")" out))
109 (else (lp (cadr x))))
110 (display (car x) out))
111 ((not)
112 (cond
113 ((and (pair? (cadr x)) (eq? 'cset (caadr x)))
114 (display "[^" out)
115 (display (cset->string (cdadr x)) out)
116 (display "]" out))
117 (else (error "can't represent general 'not' in strings" x))))
118 ((cset)
119 (display "[" out)
120 (display (cset->string (cdr x)) out)
121 (display "]" out))
122 ((- & / ~)
123 (cond
124 ((or (eqv? #\~ (car x))
125 (and (eq? '- (car x)) (pair? (cdr x)) (eq? 'any (cadr x))))
126 (display "[^" out)
127 (display (cset->string (if (eqv? #\~ (car x)) (cdr x) (cddr x))) out)
128 (display "]" out))
129 (else
130 (lp `(cset ,@(sre->cset x))))))
131 ((w/case w/nocase)
132 (display "(?" out)
133 (if (eq? (car x) 'w/case) (display "-" out))
134 (display ":" out)
135 (for-each lp (cdr x))
136 (display ")" out))
137 (else
138 (if (string? (car x))
139 (lp `(cset ,@(string->list (car x))))
140 (error "unknown sre operator" x)))))
141 ((symbol? x)
142 (case x
143 ((bos bol) (display "^" out))
144 ((eos eol) (display "$" out))
145 ((any nonl) (display "." out))
146 (else (error "unknown sre symbol" x))))
147 ((string? x)
148 (display (irregex-quote x) out))
149 ((char? x)
150 (display (irregex-quote (string x)) out))
151 (else
152 (error "unknown sre pattern" x))))
153 (get-output-string out)))
154