~ chicken-core (master) /irregex.scm
Trap1;;;; irregex.scm - container for irregex-core.scm
2;
3; Copyright (c) 2010-2022, The CHICKEN Team
4; All rights reserved.
5;
6; Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following
7; conditions are met:
8;
9; Redistributions of source code must retain the above copyright notice, this list of conditions and the following
10; disclaimer.
11; Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
12; disclaimer in the documentation and/or other materials provided with the distribution.
13; Neither the name of the author nor the names of its contributors may be used to endorse or promote
14; products derived from this software without specific prior written permission.
15;
16; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
17; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
18; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
19; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24; POSSIBILITY OF SUCH DAMAGE.
25
26
27
28(declare
29 (unit irregex)
30 (no-procedure-checks)
31 (fixnum))
32
33(module chicken.irregex
34 (;; Constructors, accessors and predicates
35 irregex irregex? string->sre maybe-string->sre sre->irregex
36 irregex-names irregex-num-submatches string->irregex
37
38 ;; Chunking constructor
39 make-irregex-chunker
40
41 ;; Main API
42 irregex-extract irregex-fold irregex-match irregex-match?
43 irregex-search irregex-split irregex-replace irregex-replace/all
44
45 ;; Chunked main API
46 irregex-fold/chunked irregex-match/chunked irregex-search/chunked
47
48 ;; Match extraction API
49 irregex-match-data? irregex-match-names
50 irregex-match-start-index irregex-match-end-index
51 irregex-match-num-submatches irregex-match-substring
52 irregex-match-valid-index?
53
54 ;; Chunked match API
55 irregex-match-start-chunk irregex-match-end-chunk
56 irregex-match-subchunk
57
58 ;; Utilities
59 glob->sre sre->string irregex-opt irregex-quote)
60
61
62(import scheme chicken.base chicken.fixnum chicken.syntax chicken.type)
63(import (only (scheme base) open-output-string get-output-string))
64
65(import-for-syntax chicken.fixnum)
66
67(include "common-declarations.scm")
68
69;; These should probably be taken out of irregex upstream
70(declare (unused filter integer-log cset-size remove))
71
72;; Due to usual-integrations, find is the one from library.scm,
73;; so find-tail is unused (it's only used in the "find" definition)
74(declare (unused find-tail))
75
76;; This is to silence an "always true" warning that we can't fix
77;; because we don't want to needlessly change irregex-core.
78(declare (type (*allow-utf8-mode?* boolean)))
79
80(define-syntax build-cache
81 (er-macro-transformer
82 (lambda (x r c)
83 ;; (build-cache N ARG FAIL)
84 (let* ((n (cadr x))
85 (n2 (* n 2))
86 (arg (caddr x))
87 (fail (cadddr x))
88 (%cache (r 'cache))
89 (%index (r 'index))
90 (%arg (r 'arg))
91 (%let (r 'let))
92 (%let* (r 'let*))
93 (%if (r 'if))
94 (%fx+ (r 'fx+))
95 (%fxmod (r 'fxmod))
96 (%equal? (r 'equal?))
97 (%quote (r 'quote))
98 (%tmp (r 'tmp))
99 (%begin (r 'begin))
100 (cache (make-vector (add1 n2) #f)))
101 (##sys#setslot cache n2 0) ; last slot: current index
102 `(,%let* ((,%cache (,%quote ,cache)) ; we mutate a literal vector
103 (,%arg ,arg))
104 ,(let fold ((i 0))
105 (if (fx>= i n)
106 ;; this should be thread-safe: a context-switch can only
107 ;; happen before this code and in the call to FAIL.
108 `(,%let ((,%tmp ,fail)
109 (,%index (##sys#slot ,%cache ,n2)))
110 (##sys#setslot ,%cache ,%index ,%arg)
111 (##sys#setslot ,%cache (,%fx+ ,%index 1) ,%tmp)
112 (##sys#setislot
113 ,%cache ,n2 (,%fxmod (,%fx+ ,%index 2) ,n2))
114 ,%tmp)
115 `(,%if (,%equal? (##sys#slot ,%cache ,(* i 2)) ,%arg)
116 (##sys#slot ,%cache ,(add1 (* i 2)))
117 ,(fold (add1 i))))))))))
118
119(declare (unused %substring=?))
120(define-compiler-syntax %substring=?
121 (syntax-rules ()
122 ((_ a b start1 start2 len)
123 (##core#inline "C_u_i_substring_equal_p" a b start1 start2 len))))
124
125(define-compiler-syntax make-irregex
126 (syntax-rules ()
127 ((_ dfa dfa/search nfa flags submatches lengths names)
128 (##sys#make-structure
129 'regexp dfa dfa/search nfa flags submatches lengths names))))
130
131(define-compiler-syntax make-irregex-match
132 (syntax-rules ()
133 ((_ count names)
134 (##sys#make-structure
135 'regexp-match
136 (make-vector (* 4 (+ 2 count)) #f) ; #1: submatches
137 names ; #2: (guess)
138 #f ; #3: chunka
139 #f)))) ; #4: fail
140
141(declare (unused reverse))
142(define-compiler-syntax reverse
143 (syntax-rules ()
144 ((_ lst) (##sys#fast-reverse lst))))
145
146(declare (unused bit-shl))
147(define-compiler-syntax bit-shl
148 (syntax-rules ()
149 ((_ n i) (fxshl n i))))
150
151(declare (unused bit-shr))
152(define-compiler-syntax bit-shr
153 (syntax-rules ()
154 ((_ n i) (fxshr n i))))
155
156(declare (unused bit-not))
157(define-compiler-syntax bit-not
158 (syntax-rules ()
159 ((_ n) (fxnot n))))
160
161(declare (unused bit-ior))
162(define-compiler-syntax bit-ior
163 (syntax-rules ()
164 ((_ a b) (fxior a b))))
165
166(declare (unused bit-and))
167(define-compiler-syntax bit-and
168 (syntax-rules ()
169 ((_ a b) (fxand a b))))
170
171(define-compiler-syntax match-vector-ref
172 (syntax-rules ()
173 ((_ m i) (##sys#slot (##sys#slot m 1) i))))
174
175(define-compiler-syntax match-vector-set!
176 (syntax-rules ()
177 ((_ m i x) (##sys#setslot (##sys#slot m 1) i x))))
178
179(declare (unused irregex-match-start-chunk-set!))
180(define-compiler-syntax irregex-match-start-chunk-set!
181 (syntax-rules ()
182 ((_ m n start)
183 (vector-set! (##sys#slot m 1) (* n 4) start))))
184
185(declare (unused irregex-match-start-index-set!))
186(define-compiler-syntax irregex-match-start-index-set!
187 (syntax-rules ()
188 ((_ m n start)
189 (vector-set! (##sys#slot m 1) (+ 1 (* n 4)) start))))
190
191(declare (unused irregex-match-end-chunk-set!))
192(define-compiler-syntax irregex-match-end-chunk-set!
193 (syntax-rules ()
194 ((_ m n end)
195 (vector-set! (##sys#slot m 1) (+ 2 (* n 4)) end))))
196
197(declare (unused irregex-match-end-index-set!))
198(define-compiler-syntax irregex-match-end-index-set!
199 (syntax-rules ()
200 ((_ m n end)
201 (vector-set! (##sys#slot m 1) (+ 3 (* n 4)) end))))
202
203(declare (unused irregex-match-chunk&index-from-tag-set!))
204(define-compiler-syntax irregex-match-chunk&index-from-tag-set!
205 (syntax-rules ()
206 ((_ m t chunk index)
207 (begin
208 (vector-set! (##sys#slot m 1) (+ 4 (* t 2)) chunk)
209 (vector-set! (##sys#slot m 1) (+ 5 (* t 2)) index)))))
210
211(include "irregex-core.scm")
212(include "irregex-utils.scm")
213
214(set! *allow-utf8-mode?* #f)
215
216(define glob->sre
217 (let ((list->string list->string)
218 (string->list string->list))
219 (lambda (s)
220 (##sys#check-string s 'glob->sre)
221 (cons
222 ':
223 (let loop ((cs (string->list s)) (dir #t))
224 (if (null? cs)
225 '()
226 (let ((c (car cs))
227 (rest (cdr cs)) )
228 (cond ((char=? c #\*)
229 (if dir
230 `((or (: (~ ("./\\"))
231 (* (~ ("/\\"))))
232 (* (~ ("./\\"))))
233 ,@(loop rest #f))
234 `((* (~ ("/\\"))) ,@(loop rest #f))))
235 ((char=? c #\?) (cons 'any (loop rest #f)))
236 ((char=? c #\[)
237 (let loop2 ((rest rest) (s '()))
238 (cond ((not (pair? rest))
239 (error 'glob->sre
240 "unexpected end of character class" s))
241 ((char=? #\] (car rest))
242 `(,(if (> (length s) 1)
243 `(or ,@s)
244 (car s))
245 ,@(loop (cdr rest) #f)))
246 ((and (pair? (cdr rest))
247 (pair? (cddr rest))
248 (char=? #\- (cadr rest)) )
249 (loop2 (cdddr rest)
250 (cons `(/ ,(car rest) ,(caddr rest)) s)))
251 ((and (pair? (cdr rest))
252 (char=? #\- (car rest)))
253 (loop2 (cddr rest)
254 (cons `(~ ,(cadr rest)) s)))
255 (else
256 (loop2 (cdr rest) (cons (car rest) s))))))
257 (else (cons c (loop rest (memq c '(#\\ #\/)))))))))))))
258
259)