~ chicken-core (chicken-5) /irregex.scm
Trap1;;;; irregex.scm - container for irregex-core.scm
2;
3; Copyright (c) 2010-2022, The CHICKEN Team
4; All rights reserved.
5;
6; Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following
7; conditions are met:
8;
9; Redistributions of source code must retain the above copyright notice, this list of conditions and the following
10; disclaimer.
11; Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
12; disclaimer in the documentation and/or other materials provided with the distribution.
13; Neither the name of the author nor the names of its contributors may be used to endorse or promote
14; products derived from this software without specific prior written permission.
15;
16; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
17; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
18; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
19; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24; POSSIBILITY OF SUCH DAMAGE.
25
26
27
28(declare
29 (unit irregex)
30 (no-procedure-checks)
31 (fixnum))
32
33(module chicken.irregex
34 (;; Constructors, accessors and predicates
35 irregex irregex? string->sre maybe-string->sre sre->irregex
36 irregex-names irregex-num-submatches string->irregex
37
38 ;; Chunking constructor
39 make-irregex-chunker
40
41 ;; Main API
42 irregex-extract irregex-fold irregex-match irregex-match?
43 irregex-search irregex-split irregex-replace irregex-replace/all
44
45 ;; Chunked main API
46 irregex-fold/chunked irregex-match/chunked irregex-search/chunked
47
48 ;; Match extraction API
49 irregex-match-data? irregex-match-names
50 irregex-match-start-index irregex-match-end-index
51 irregex-match-num-submatches irregex-match-substring
52 irregex-match-valid-index?
53
54 ;; Chunked match API
55 irregex-match-start-chunk irregex-match-end-chunk
56 irregex-match-subchunk
57
58 ;; Utilities
59 glob->sre sre->string irregex-opt irregex-quote)
60
61
62(import scheme chicken.base chicken.fixnum chicken.syntax chicken.type)
63
64(import-for-syntax chicken.fixnum)
65
66(include "common-declarations.scm")
67
68;; These should probably be taken out of irregex upstream
69(declare (unused filter integer-log cset-size remove))
70
71;; Due to usual-integrations, find is the one from library.scm,
72;; so find-tail is unused (it's only used in the "find" definition)
73(declare (unused find-tail))
74
75;; This is to silence an "always true" warning that we can't fix
76;; because we don't want to needlessly change irregex-core.
77(declare (type (*allow-utf8-mode?* boolean)))
78
79(define-syntax build-cache
80 (er-macro-transformer
81 (lambda (x r c)
82 ;; (build-cache N ARG FAIL)
83 (let* ((n (cadr x))
84 (n2 (* n 2))
85 (arg (caddr x))
86 (fail (cadddr x))
87 (%cache (r 'cache))
88 (%index (r 'index))
89 (%arg (r 'arg))
90 (%let (r 'let))
91 (%let* (r 'let*))
92 (%if (r 'if))
93 (%fx+ (r 'fx+))
94 (%fxmod (r 'fxmod))
95 (%equal? (r 'equal?))
96 (%quote (r 'quote))
97 (%tmp (r 'tmp))
98 (%begin (r 'begin))
99 (cache (make-vector (add1 n2) #f)))
100 (##sys#setslot cache n2 0) ; last slot: current index
101 `(,%let* ((,%cache (,%quote ,cache)) ; we mutate a literal vector
102 (,%arg ,arg))
103 ,(let fold ((i 0))
104 (if (fx>= i n)
105 ;; this should be thread-safe: a context-switch can only
106 ;; happen before this code and in the call to FAIL.
107 `(,%let ((,%tmp ,fail)
108 (,%index (##sys#slot ,%cache ,n2)))
109 (##sys#setslot ,%cache ,%index ,%arg)
110 (##sys#setslot ,%cache (,%fx+ ,%index 1) ,%tmp)
111 (##sys#setislot
112 ,%cache ,n2 (,%fxmod (,%fx+ ,%index 2) ,n2))
113 ,%tmp)
114 `(,%if (,%equal? (##sys#slot ,%cache ,(* i 2)) ,%arg)
115 (##sys#slot ,%cache ,(add1 (* i 2)))
116 ,(fold (add1 i))))))))))
117
118(declare (unused %%string-copy!))
119(define-compiler-syntax %%string-copy!
120 (syntax-rules ()
121 ((_ to tstart from fstart fend)
122 (let ((x to)
123 (y tstart)
124 (z from)
125 (u fstart)
126 (v fend))
127 (##core#inline "C_substring_copy" z x u v y)))))
128
129(declare (unused %substring=?))
130(define-compiler-syntax %substring=?
131 (syntax-rules ()
132 ((_ a b start1 start2 len)
133 (##core#inline "C_substring_compare" a b start1 start2 len))))
134
135(define-compiler-syntax make-irregex
136 (syntax-rules ()
137 ((_ dfa dfa/search nfa flags submatches lengths names)
138 (##sys#make-structure
139 'regexp dfa dfa/search nfa flags submatches lengths names))))
140
141(define-compiler-syntax make-irregex-match
142 (syntax-rules ()
143 ((_ count names)
144 (##sys#make-structure
145 'regexp-match
146 (make-vector (* 4 (+ 2 count)) #f) ; #1: submatches
147 names ; #2: (guess)
148 #f ; #3: chunka
149 #f)))) ; #4: fail
150
151(declare (unused reverse))
152(define-compiler-syntax reverse
153 (syntax-rules ()
154 ((_ lst) (##sys#fast-reverse lst))))
155
156(declare (unused bit-shl))
157(define-compiler-syntax bit-shl
158 (syntax-rules ()
159 ((_ n i) (fxshl n i))))
160
161(declare (unused bit-shr))
162(define-compiler-syntax bit-shr
163 (syntax-rules ()
164 ((_ n i) (fxshr n i))))
165
166(declare (unused bit-not))
167(define-compiler-syntax bit-not
168 (syntax-rules ()
169 ((_ n) (fxnot n))))
170
171(declare (unused bit-ior))
172(define-compiler-syntax bit-ior
173 (syntax-rules ()
174 ((_ a b) (fxior a b))))
175
176(declare (unused bit-and))
177(define-compiler-syntax bit-and
178 (syntax-rules ()
179 ((_ a b) (fxand a b))))
180
181(define-compiler-syntax match-vector-ref
182 (syntax-rules ()
183 ((_ m i) (##sys#slot (##sys#slot m 1) i))))
184
185(define-compiler-syntax match-vector-set!
186 (syntax-rules ()
187 ((_ m i x) (##sys#setslot (##sys#slot m 1) i x))))
188
189(declare (unused irregex-match-start-chunk-set!))
190(define-compiler-syntax irregex-match-start-chunk-set!
191 (syntax-rules ()
192 ((_ m n start)
193 (vector-set! (##sys#slot m 1) (* n 4) start))))
194
195(declare (unused irregex-match-start-index-set!))
196(define-compiler-syntax irregex-match-start-index-set!
197 (syntax-rules ()
198 ((_ m n start)
199 (vector-set! (##sys#slot m 1) (+ 1 (* n 4)) start))))
200
201(declare (unused irregex-match-end-chunk-set!))
202(define-compiler-syntax irregex-match-end-chunk-set!
203 (syntax-rules ()
204 ((_ m n end)
205 (vector-set! (##sys#slot m 1) (+ 2 (* n 4)) end))))
206
207(declare (unused irregex-match-end-index-set!))
208(define-compiler-syntax irregex-match-end-index-set!
209 (syntax-rules ()
210 ((_ m n end)
211 (vector-set! (##sys#slot m 1) (+ 3 (* n 4)) end))))
212
213(declare (unused irregex-match-chunk&index-from-tag-set!))
214(define-compiler-syntax irregex-match-chunk&index-from-tag-set!
215 (syntax-rules ()
216 ((_ m t chunk index)
217 (begin
218 (vector-set! (##sys#slot m 1) (+ 4 (* t 2)) chunk)
219 (vector-set! (##sys#slot m 1) (+ 5 (* t 2)) index)))))
220
221(include "irregex-core.scm")
222(include "irregex-utils.scm")
223
224(define glob->sre
225 (let ((list->string list->string)
226 (string->list string->list))
227 (lambda (s)
228 (##sys#check-string s 'glob->sre)
229 (cons
230 ':
231 (let loop ((cs (string->list s)) (dir #t))
232 (if (null? cs)
233 '()
234 (let ((c (car cs))
235 (rest (cdr cs)) )
236 (cond ((char=? c #\*)
237 (if dir
238 `((or (: (~ ("./\\"))
239 (* (~ ("/\\"))))
240 (* (~ ("./\\"))))
241 ,@(loop rest #f))
242 `((* (~ ("/\\"))) ,@(loop rest #f))))
243 ((char=? c #\?) (cons 'any (loop rest #f)))
244 ((char=? c #\[)
245 (let loop2 ((rest rest) (s '()))
246 (cond ((not (pair? rest))
247 (error 'glob->sre
248 "unexpected end of character class" s))
249 ((char=? #\] (car rest))
250 `(,(if (> (length s) 1)
251 `(or ,@s)
252 (car s))
253 ,@(loop (cdr rest) #f)))
254 ((and (pair? (cdr rest))
255 (pair? (cddr rest))
256 (char=? #\- (cadr rest)) )
257 (loop2 (cdddr rest)
258 (cons `(/ ,(car rest) ,(caddr rest)) s)))
259 ((and (pair? (cdr rest))
260 (char=? #\- (car rest)))
261 (loop2 (cddr rest)
262 (cons `(~ ,(cadr rest)) s)))
263 (else
264 (loop2 (cdr rest) (cons (car rest) s))))))
265 (else (cons c (loop rest (memq c '(#\\ #\/)))))))))))))
266
267)