~ chicken-core (chicken-5) /irregex.scm
Trap1;;;; irregex.scm - container for irregex-core.scm2;3; Copyright (c) 2010-2022, The CHICKEN Team4; All rights reserved.5;6; Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following7; conditions are met:8;9; Redistributions of source code must retain the above copyright notice, this list of conditions and the following10; disclaimer.11; Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following12; disclaimer in the documentation and/or other materials provided with the distribution.13; Neither the name of the author nor the names of its contributors may be used to endorse or promote14; products derived from this software without specific prior written permission.15;16; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS17; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY18; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR19; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR20; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR21; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY22; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR23; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE24; POSSIBILITY OF SUCH DAMAGE.25262728(declare29 (unit irregex)30 (no-procedure-checks)31 (fixnum))3233(module chicken.irregex34 (;; Constructors, accessors and predicates35 irregex irregex? string->sre maybe-string->sre sre->irregex36 irregex-names irregex-num-submatches string->irregex3738 ;; Chunking constructor39 make-irregex-chunker4041 ;; Main API42 irregex-extract irregex-fold irregex-match irregex-match?43 irregex-search irregex-split irregex-replace irregex-replace/all4445 ;; Chunked main API46 irregex-fold/chunked irregex-match/chunked irregex-search/chunked4748 ;; Match extraction API49 irregex-match-data? irregex-match-names50 irregex-match-start-index irregex-match-end-index51 irregex-match-num-submatches irregex-match-substring52 irregex-match-valid-index?5354 ;; Chunked match API55 irregex-match-start-chunk irregex-match-end-chunk56 irregex-match-subchunk5758 ;; Utilities59 glob->sre sre->string irregex-opt irregex-quote)606162(import scheme chicken.base chicken.fixnum chicken.syntax chicken.type)6364(import-for-syntax chicken.fixnum)6566(include "common-declarations.scm")6768;; These should probably be taken out of irregex upstream69(declare (unused filter integer-log cset-size remove))7071;; Due to usual-integrations, find is the one from library.scm,72;; so find-tail is unused (it's only used in the "find" definition)73(declare (unused find-tail))7475;; This is to silence an "always true" warning that we can't fix76;; because we don't want to needlessly change irregex-core.77(declare (type (*allow-utf8-mode?* boolean)))7879(define-syntax build-cache80 (er-macro-transformer81 (lambda (x r c)82 ;; (build-cache N ARG FAIL)83 (let* ((n (cadr x))84 (n2 (* n 2))85 (arg (caddr x))86 (fail (cadddr x))87 (%cache (r 'cache))88 (%index (r 'index))89 (%arg (r 'arg))90 (%let (r 'let))91 (%let* (r 'let*))92 (%if (r 'if))93 (%fx+ (r 'fx+))94 (%fxmod (r 'fxmod))95 (%equal? (r 'equal?))96 (%quote (r 'quote))97 (%tmp (r 'tmp))98 (%begin (r 'begin))99 (cache (make-vector (add1 n2) #f)))100 (##sys#setslot cache n2 0) ; last slot: current index101 `(,%let* ((,%cache (,%quote ,cache)) ; we mutate a literal vector102 (,%arg ,arg))103 ,(let fold ((i 0))104 (if (fx>= i n)105 ;; this should be thread-safe: a context-switch can only106 ;; happen before this code and in the call to FAIL.107 `(,%let ((,%tmp ,fail)108 (,%index (##sys#slot ,%cache ,n2)))109 (##sys#setslot ,%cache ,%index ,%arg)110 (##sys#setslot ,%cache (,%fx+ ,%index 1) ,%tmp)111 (##sys#setislot112 ,%cache ,n2 (,%fxmod (,%fx+ ,%index 2) ,n2))113 ,%tmp)114 `(,%if (,%equal? (##sys#slot ,%cache ,(* i 2)) ,%arg)115 (##sys#slot ,%cache ,(add1 (* i 2)))116 ,(fold (add1 i))))))))))117118(declare (unused %%string-copy!))119(define-compiler-syntax %%string-copy!120 (syntax-rules ()121 ((_ to tstart from fstart fend)122 (let ((x to)123 (y tstart)124 (z from)125 (u fstart)126 (v fend))127 (##core#inline "C_substring_copy" z x u v y)))))128129(declare (unused %substring=?))130(define-compiler-syntax %substring=?131 (syntax-rules ()132 ((_ a b start1 start2 len)133 (##core#inline "C_substring_compare" a b start1 start2 len))))134135(define-compiler-syntax make-irregex136 (syntax-rules ()137 ((_ dfa dfa/search nfa flags submatches lengths names)138 (##sys#make-structure139 'regexp dfa dfa/search nfa flags submatches lengths names))))140141(define-compiler-syntax make-irregex-match142 (syntax-rules ()143 ((_ count names)144 (##sys#make-structure145 'regexp-match146 (make-vector (* 4 (+ 2 count)) #f) ; #1: submatches147 names ; #2: (guess)148 #f ; #3: chunka149 #f)))) ; #4: fail150151(declare (unused reverse))152(define-compiler-syntax reverse153 (syntax-rules ()154 ((_ lst) (##sys#fast-reverse lst))))155156(declare (unused bit-shl))157(define-compiler-syntax bit-shl158 (syntax-rules ()159 ((_ n i) (fxshl n i))))160161(declare (unused bit-shr))162(define-compiler-syntax bit-shr163 (syntax-rules ()164 ((_ n i) (fxshr n i))))165166(declare (unused bit-not))167(define-compiler-syntax bit-not168 (syntax-rules ()169 ((_ n) (fxnot n))))170171(declare (unused bit-ior))172(define-compiler-syntax bit-ior173 (syntax-rules ()174 ((_ a b) (fxior a b))))175176(declare (unused bit-and))177(define-compiler-syntax bit-and178 (syntax-rules ()179 ((_ a b) (fxand a b))))180181(define-compiler-syntax match-vector-ref182 (syntax-rules ()183 ((_ m i) (##sys#slot (##sys#slot m 1) i))))184185(define-compiler-syntax match-vector-set!186 (syntax-rules ()187 ((_ m i x) (##sys#setslot (##sys#slot m 1) i x))))188189(declare (unused irregex-match-start-chunk-set!))190(define-compiler-syntax irregex-match-start-chunk-set!191 (syntax-rules ()192 ((_ m n start)193 (vector-set! (##sys#slot m 1) (* n 4) start))))194195(declare (unused irregex-match-start-index-set!))196(define-compiler-syntax irregex-match-start-index-set!197 (syntax-rules ()198 ((_ m n start)199 (vector-set! (##sys#slot m 1) (+ 1 (* n 4)) start))))200201(declare (unused irregex-match-end-chunk-set!))202(define-compiler-syntax irregex-match-end-chunk-set!203 (syntax-rules ()204 ((_ m n end)205 (vector-set! (##sys#slot m 1) (+ 2 (* n 4)) end))))206207(declare (unused irregex-match-end-index-set!))208(define-compiler-syntax irregex-match-end-index-set!209 (syntax-rules ()210 ((_ m n end)211 (vector-set! (##sys#slot m 1) (+ 3 (* n 4)) end))))212213(declare (unused irregex-match-chunk&index-from-tag-set!))214(define-compiler-syntax irregex-match-chunk&index-from-tag-set!215 (syntax-rules ()216 ((_ m t chunk index)217 (begin218 (vector-set! (##sys#slot m 1) (+ 4 (* t 2)) chunk)219 (vector-set! (##sys#slot m 1) (+ 5 (* t 2)) index)))))220221(include "irregex-core.scm")222(include "irregex-utils.scm")223224(define glob->sre225 (let ((list->string list->string)226 (string->list string->list))227 (lambda (s)228 (##sys#check-string s 'glob->sre)229 (cons230 ':231 (let loop ((cs (string->list s)) (dir #t))232 (if (null? cs)233 '()234 (let ((c (car cs))235 (rest (cdr cs)) )236 (cond ((char=? c #\*)237 (if dir238 `((or (: (~ ("./\\"))239 (* (~ ("/\\"))))240 (* (~ ("./\\"))))241 ,@(loop rest #f))242 `((* (~ ("/\\"))) ,@(loop rest #f))))243 ((char=? c #\?) (cons 'any (loop rest #f)))244 ((char=? c #\[)245 (let loop2 ((rest rest) (s '()))246 (cond ((not (pair? rest))247 (error 'glob->sre248 "unexpected end of character class" s))249 ((char=? #\] (car rest))250 `(,(if (> (length s) 1)251 `(or ,@s)252 (car s))253 ,@(loop (cdr rest) #f)))254 ((and (pair? (cdr rest))255 (pair? (cddr rest))256 (char=? #\- (cadr rest)) )257 (loop2 (cdddr rest)258 (cons `(/ ,(car rest) ,(caddr rest)) s)))259 ((and (pair? (cdr rest))260 (char=? #\- (car rest)))261 (loop2 (cddr rest)262 (cons `(~ ,(cadr rest)) s)))263 (else264 (loop2 (cdr rest) (cons (car rest) s))))))265 (else (cons c (loop rest (memq c '(#\\ #\/)))))))))))))266267)