~ chicken-core (master) /irregex.scm
Trap1;;;; irregex.scm - container for irregex-core.scm2;3; Copyright (c) 2010-2022, The CHICKEN Team4; All rights reserved.5;6; Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following7; conditions are met:8;9; Redistributions of source code must retain the above copyright notice, this list of conditions and the following10; disclaimer.11; Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following12; disclaimer in the documentation and/or other materials provided with the distribution.13; Neither the name of the author nor the names of its contributors may be used to endorse or promote14; products derived from this software without specific prior written permission.15;16; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS17; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY18; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR19; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR20; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR21; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY22; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR23; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE24; POSSIBILITY OF SUCH DAMAGE.25262728(declare29 (unit irregex)30 (no-procedure-checks)31 (fixnum))3233(module chicken.irregex34 (;; Constructors, accessors and predicates35 irregex irregex? string->sre maybe-string->sre sre->irregex36 irregex-names irregex-num-submatches string->irregex3738 ;; Chunking constructor39 make-irregex-chunker4041 ;; Main API42 irregex-extract irregex-fold irregex-match irregex-match?43 irregex-search irregex-split irregex-replace irregex-replace/all4445 ;; Chunked main API46 irregex-fold/chunked irregex-match/chunked irregex-search/chunked4748 ;; Match extraction API49 irregex-match-data? irregex-match-names50 irregex-match-start-index irregex-match-end-index51 irregex-match-num-submatches irregex-match-substring52 irregex-match-valid-index?5354 ;; Chunked match API55 irregex-match-start-chunk irregex-match-end-chunk56 irregex-match-subchunk5758 ;; Utilities59 glob->sre sre->string irregex-opt irregex-quote)606162(import scheme chicken.base chicken.fixnum chicken.syntax chicken.type)63(import (only (scheme base) open-output-string get-output-string))6465(import-for-syntax chicken.fixnum)6667(include "common-declarations.scm")6869;; These should probably be taken out of irregex upstream70(declare (unused filter integer-log cset-size remove))7172;; Due to usual-integrations, find is the one from library.scm,73;; so find-tail is unused (it's only used in the "find" definition)74(declare (unused find-tail))7576;; This is to silence an "always true" warning that we can't fix77;; because we don't want to needlessly change irregex-core.78(declare (type (*allow-utf8-mode?* boolean)))7980(define-syntax build-cache81 (er-macro-transformer82 (lambda (x r c)83 ;; (build-cache N ARG FAIL)84 (let* ((n (cadr x))85 (n2 (* n 2))86 (arg (caddr x))87 (fail (cadddr x))88 (%cache (r 'cache))89 (%index (r 'index))90 (%arg (r 'arg))91 (%let (r 'let))92 (%let* (r 'let*))93 (%if (r 'if))94 (%fx+ (r 'fx+))95 (%fxmod (r 'fxmod))96 (%equal? (r 'equal?))97 (%quote (r 'quote))98 (%tmp (r 'tmp))99 (%begin (r 'begin))100 (cache (make-vector (add1 n2) #f)))101 (##sys#setslot cache n2 0) ; last slot: current index102 `(,%let* ((,%cache (,%quote ,cache)) ; we mutate a literal vector103 (,%arg ,arg))104 ,(let fold ((i 0))105 (if (fx>= i n)106 ;; this should be thread-safe: a context-switch can only107 ;; happen before this code and in the call to FAIL.108 `(,%let ((,%tmp ,fail)109 (,%index (##sys#slot ,%cache ,n2)))110 (##sys#setslot ,%cache ,%index ,%arg)111 (##sys#setslot ,%cache (,%fx+ ,%index 1) ,%tmp)112 (##sys#setislot113 ,%cache ,n2 (,%fxmod (,%fx+ ,%index 2) ,n2))114 ,%tmp)115 `(,%if (,%equal? (##sys#slot ,%cache ,(* i 2)) ,%arg)116 (##sys#slot ,%cache ,(add1 (* i 2)))117 ,(fold (add1 i))))))))))118119(declare (unused %substring=?))120(define-compiler-syntax %substring=?121 (syntax-rules ()122 ((_ a b start1 start2 len)123 (##core#inline "C_u_i_substring_equal_p" a b start1 start2 len))))124125(define-compiler-syntax make-irregex126 (syntax-rules ()127 ((_ dfa dfa/search nfa flags submatches lengths names)128 (##sys#make-structure129 'regexp dfa dfa/search nfa flags submatches lengths names))))130131(define-compiler-syntax make-irregex-match132 (syntax-rules ()133 ((_ count names)134 (##sys#make-structure135 'regexp-match136 (make-vector (* 4 (+ 2 count)) #f) ; #1: submatches137 names ; #2: (guess)138 #f ; #3: chunka139 #f)))) ; #4: fail140141(declare (unused reverse))142(define-compiler-syntax reverse143 (syntax-rules ()144 ((_ lst) (##sys#fast-reverse lst))))145146(declare (unused bit-shl))147(define-compiler-syntax bit-shl148 (syntax-rules ()149 ((_ n i) (fxshl n i))))150151(declare (unused bit-shr))152(define-compiler-syntax bit-shr153 (syntax-rules ()154 ((_ n i) (fxshr n i))))155156(declare (unused bit-not))157(define-compiler-syntax bit-not158 (syntax-rules ()159 ((_ n) (fxnot n))))160161(declare (unused bit-ior))162(define-compiler-syntax bit-ior163 (syntax-rules ()164 ((_ a b) (fxior a b))))165166(declare (unused bit-and))167(define-compiler-syntax bit-and168 (syntax-rules ()169 ((_ a b) (fxand a b))))170171(define-compiler-syntax match-vector-ref172 (syntax-rules ()173 ((_ m i) (##sys#slot (##sys#slot m 1) i))))174175(define-compiler-syntax match-vector-set!176 (syntax-rules ()177 ((_ m i x) (##sys#setslot (##sys#slot m 1) i x))))178179(declare (unused irregex-match-start-chunk-set!))180(define-compiler-syntax irregex-match-start-chunk-set!181 (syntax-rules ()182 ((_ m n start)183 (vector-set! (##sys#slot m 1) (* n 4) start))))184185(declare (unused irregex-match-start-index-set!))186(define-compiler-syntax irregex-match-start-index-set!187 (syntax-rules ()188 ((_ m n start)189 (vector-set! (##sys#slot m 1) (+ 1 (* n 4)) start))))190191(declare (unused irregex-match-end-chunk-set!))192(define-compiler-syntax irregex-match-end-chunk-set!193 (syntax-rules ()194 ((_ m n end)195 (vector-set! (##sys#slot m 1) (+ 2 (* n 4)) end))))196197(declare (unused irregex-match-end-index-set!))198(define-compiler-syntax irregex-match-end-index-set!199 (syntax-rules ()200 ((_ m n end)201 (vector-set! (##sys#slot m 1) (+ 3 (* n 4)) end))))202203(declare (unused irregex-match-chunk&index-from-tag-set!))204(define-compiler-syntax irregex-match-chunk&index-from-tag-set!205 (syntax-rules ()206 ((_ m t chunk index)207 (begin208 (vector-set! (##sys#slot m 1) (+ 4 (* t 2)) chunk)209 (vector-set! (##sys#slot m 1) (+ 5 (* t 2)) index)))))210211(include "irregex-core.scm")212(include "irregex-utils.scm")213214(set! *allow-utf8-mode?* #f)215216(define glob->sre217 (let ((list->string list->string)218 (string->list string->list))219 (lambda (s)220 (##sys#check-string s 'glob->sre)221 (cons222 ':223 (let loop ((cs (string->list s)) (dir #t))224 (if (null? cs)225 '()226 (let ((c (car cs))227 (rest (cdr cs)) )228 (cond ((char=? c #\*)229 (if dir230 `((or (: (~ ("./\\"))231 (* (~ ("/\\"))))232 (* (~ ("./\\"))))233 ,@(loop rest #f))234 `((* (~ ("/\\"))) ,@(loop rest #f))))235 ((char=? c #\?) (cons 'any (loop rest #f)))236 ((char=? c #\[)237 (let loop2 ((rest rest) (s '()))238 (cond ((not (pair? rest))239 (error 'glob->sre240 "unexpected end of character class" s))241 ((char=? #\] (car rest))242 `(,(if (> (length s) 1)243 `(or ,@s)244 (car s))245 ,@(loop (cdr rest) #f)))246 ((and (pair? (cdr rest))247 (pair? (cddr rest))248 (char=? #\- (cadr rest)) )249 (loop2 (cdddr rest)250 (cons `(/ ,(car rest) ,(caddr rest)) s)))251 ((and (pair? (cdr rest))252 (char=? #\- (car rest)))253 (loop2 (cddr rest)254 (cons `(~ ,(cadr rest)) s)))255 (else256 (loop2 (cdr rest) (cons (car rest) s))))))257 (else (cons c (loop rest (memq c '(#\\ #\/)))))))))))))258259)