~ chicken-core (chicken-5) 04b31b97c5ba6fea018c6f99ba9bc529b23144bd
commit 04b31b97c5ba6fea018c6f99ba9bc529b23144bd Author: felix <felix@call-with-current-continuation.org> AuthorDate: Sat Jul 17 23:47:51 2010 +0200 Commit: felix <felix@call-with-current-continuation.org> CommitDate: Tue Jul 27 13:09:33 2010 +0200 upgraded irregex to 0.8.1 and updated types.db; build-chicken feature diff --git a/defaults.make b/defaults.make index 4d6f183f..3b7217fc 100644 --- a/defaults.make +++ b/defaults.make @@ -293,7 +293,7 @@ CSI ?= csi$(EXE) # Scheme compiler flags -CHICKEN_OPTIONS = -optimize-level 2 -include-path . -include-path $(SRCDIR) -inline -ignore-repository +CHICKEN_OPTIONS = -optimize-level 2 -include-path . -include-path $(SRCDIR) -inline -ignore-repository -feature building-chicken ifdef DEBUGBUILD CHICKEN_OPTIONS += -feature debugbuild -scrutinize -types $(SRCDIR)types.db else diff --git a/irregex-core.scm b/irregex-core.scm index a235243e..040136bd 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -31,6 +31,7 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; History ;; +;; 0.8.1: 2010/03/09 - backtracking irregex-match fix and other small fixes ;; 0.8.0: 2010/01/20 - optimizing DFA compilation, adding SRE escapes ;; inside PCREs, adding utility SREs ;; 0.7.5: 2009/08/31 - adding irregex-extract and irregex-split @@ -118,9 +119,10 @@ (internal "##sys#make-structure" 'regexp-match - (make-vector (+ (* 4 (+ 2 count)) 3) #f) - names - #f)) + (make-vector (+ (* 4 (+ 2 count)) 3) #f) ; #1: submatches + names ; #2: (guess) + #f ; #3: chunka + #f)) ; #4: fail (define (irregex-new-matches irx) (make-irregex-match (irregex-num-submatches irx) (irregex-names irx))) (define (irregex-reset-matches! m) @@ -137,12 +139,13 @@ (vector-copy! v v2) v2) (internal "##sys#slot" m 2) - (internal "##sys#slot" m 3)))) + (internal "##sys#slot" m 3) + (internal "##sys#slot" m 4)))) (define (irregex-match-data? obj) (internal "##sys#structure?" obj 'regexp-match)) (define (irregex-match-num-submatches m) (internal "##sys#check-structure" m 'regexp-match 'irregex-match-num-submatches) - (- (quotient (vector-length (internal "##sys#slot" m 1)) 4) 2)) + (- (fx/ (internal "##sys#size" (internal "##sys#slot" m 1)) 4) 2)) (define (irregex-match-chunker m) (internal "##sys#slot" m 3)) (define (irregex-match-names m) @@ -157,7 +160,9 @@ (define-inline (%irregex-match-end-chunk m n) (internal "##sys#slot" (internal "##sys#slot" m 1) (+ 2 (* n 4)))) (define (%irregex-match-end-index m n) - (internal "##sys#slot" (internal "##sys#slot" m 1) (+ 3 (* n 4)))))) + (internal "##sys#slot" (internal "##sys#slot" m 1) (+ 3 (* n 4)))) + (define (%irregex-match-fail m) (internal "##sys#slot" m 4)) + (define (%irregex-match-fail-set! m x) (internal "##sys#setslot" m 4 x)))) (else (begin (define (irregex-new-matches irx) @@ -178,7 +183,7 @@ (>= (vector-length obj) 11) (eq? irregex-match-tag (vector-ref obj 0)))) (define (make-irregex-match count names) - (let ((res (make-vector (+ (* 4 (+ 2 count)) 3) #f))) + (let ((res (make-vector (+ (* 4 (+ 2 count)) 4) #f))) (vector-set! res 0 irregex-match-tag) (vector-set! res 2 names) res)) @@ -193,7 +198,9 @@ (define (%irregex-match-start-chunk m n) (vector-ref m (+ 3 (* n 4)))) (define (%irregex-match-start-index m n) (vector-ref m (+ 4 (* n 4)))) (define (%irregex-match-end-chunk m n) (vector-ref m (+ 5 (* n 4)))) - (define (%irregex-match-end-index m n) (vector-ref m (+ 6 (* n 4))))))) + (define (%irregex-match-end-index m n) (vector-ref m (+ 6 (* n 4)))) + (define (%irregex-match-fail m) (vector-ref m (- (vector-length m) 1))) + (define (%irregex-match-fail-set! m x) (vector-set! m (- (vector-length m) 1) x))))) ;; public interface with error checking (define (irregex-match-start-chunk m n) @@ -256,7 +263,7 @@ (if (not (irregex-match-data? m)) (error "irregex-match-valid-index?: not match data" m)) (if (not (integer? n)) - (error "irregex-match-valid-index?: not a valid index" n)) + (error "irregex-match-valid-index?: not an integer" n)) (%irregex-match-valid-index? m n)) (define (irregex-match-substring m . opt) @@ -630,7 +637,7 @@ (define (collect) (if (= from i) res (cons (substring str from i) res))) (if (>= i end) - (error "unterminated string in embeded SRE" str) + (error "unterminated string in embedded SRE" str) (case (string-ref str i) ((#\") (k (string-cat-reverse (collect)) (+ i 1))) ((#\\) (scan (+ i 1) (+ i 2) (collect))) @@ -849,7 +856,7 @@ (cond ((>= (+ i 1) end) (error "unterminated parenthesis in regexp" str)) - ((not (memq (string-ref str (+ i 1)) '(#\? #\*))) ; normal case + ((not (memv (string-ref str (+ i 1)) '(#\? #\*))) ; normal case (lp (+ i 1) (+ i 1) (flag-join flags ~save?) '() (save))) ((>= (+ i 2) end) (error "unterminated parenthesis in regexp" str)) @@ -1574,7 +1581,7 @@ ;;;; Compilation (cond-expand - (chicken + (building-chicken (define-syntax cached (syntax-rules () ((_ arg fail) (build-cache 5 arg fail))))) @@ -2007,32 +2014,37 @@ (else #f))) (else - (let ((matcher (irregex-nfa irx)) - (str ((chunker-get-str cnk) src)) - (end ((chunker-get-end cnk) src)) - (get-next (chunker-get-next cnk)) - (init (cons src i))) - (if (flag-set? (irregex-flags irx) ~searcher?) - (matcher cnk init src str i end matches (lambda () #f)) - (let lp ((src2 src) - (str str) - (i i) - (end end)) - (cond - ((matcher cnk init src2 str i end matches (lambda () #f)) - (irregex-match-start-chunk-set! matches 0 src2) - (irregex-match-start-index-set! matches 0 i) - matches) - ((< i end) - (lp src2 str (+ i 1) end)) - (else - (let ((src2 (get-next src2))) - (if src2 - (lp src2 - ((chunker-get-str cnk) src2) - ((chunker-get-start cnk) src2) - ((chunker-get-end cnk) src2)) - #f)))))))))) + (let ((res (irregex-search/backtrack irx cnk src i matches))) + (if res (%irregex-match-fail-set! res #f)) + res)))) + +(define (irregex-search/backtrack irx cnk src i matches) + (let ((matcher (irregex-nfa irx)) + (str ((chunker-get-str cnk) src)) + (end ((chunker-get-end cnk) src)) + (get-next (chunker-get-next cnk)) + (init (cons src i))) + (if (flag-set? (irregex-flags irx) ~searcher?) + (matcher cnk init src str i end matches (lambda () #f)) + (let lp ((src2 src) + (str str) + (i i) + (end end)) + (cond + ((matcher cnk init src2 str i end matches (lambda () #f)) + (irregex-match-start-chunk-set! matches 0 src2) + (irregex-match-start-index-set! matches 0 i) + matches) + ((< i end) + (lp src2 str (+ i 1) end)) + (else + (let ((src2 (get-next src2))) + (if src2 + (lp src2 + ((chunker-get-str cnk) src2) + ((chunker-get-start cnk) src2) + ((chunker-get-end cnk) src2)) + #f)))))))) (define (irregex-match irx str . o) (if (not (string? str)) (error "irregex-match: not a string" str)) @@ -2069,12 +2081,21 @@ (str ((chunker-get-str cnk) src)) (i ((chunker-get-start cnk) src)) (end ((chunker-get-end cnk) src)) - (m (matcher cnk src src str i end matches (lambda () #f)))) - (and m - (not ((chunker-get-next cnk) (%irregex-match-end-chunk m 0))) - (= ((chunker-get-end cnk) (%irregex-match-end-chunk m 0)) - (%irregex-match-end-index m 0)) - m)))))) + (init (cons src i))) + (let lp ((m (matcher cnk init src str i end matches (lambda () #f)))) + (and m + (cond + ((and (not ((chunker-get-next cnk) + (%irregex-match-end-chunk m 0))) + (= ((chunker-get-end cnk) + (%irregex-match-end-chunk m 0)) + (%irregex-match-end-index m 0))) + (%irregex-match-fail-set! m #f) + m) + ((%irregex-match-fail m) + (lp ((%irregex-match-fail m)))) + (else + #f))))))))) (define (irregex-match? . args) (and (apply irregex-match args) #t)) @@ -2523,9 +2544,11 @@ ;; (sre-sequence (cdddar ls))))) ;; (cdr ls)) ;; n flags next)) - (($ submatch => submatch-named) ;; ignore submatches altogether + (($ submatch) (lp (cons (sre-sequence (cdar ls)) (cdr ls)) n flags next)) + ((=> submatch-named) + (lp (cons (sre-sequence (cddar ls)) (cdr ls)) n flags next)) (else (cond ((assq (caar ls) sre-named-definitions) @@ -2917,8 +2940,8 @@ (define-inline (match-vector-set! m i x) (vector-set! (internal "##sys#slot" m 1) i x)))) (else - (define match-vector-ref vector-ref) - (define match-vector-set! vector-set!))) + (define (match-vector-ref v i) (vector-ref v (+ 3 i))) + (define (match-vector-set! v i x) (vector-set! v (+ 3 i) x)))) (define (sre-match-extractor sre num-submatches) (let* ((tmp (+ num-submatches 1)) @@ -3026,9 +3049,13 @@ (lambda (cnk start i end j matches) (match-once cnk start i end j matches) #t))) - (($ submatch) + (($ submatch => submatch-named) (let ((match-one - (lp (sre-sequence (cdr sre)) (+ n 1) #t)) + (lp (sre-sequence (if (memq (car sre) '($ submatch)) + (cdr sre) + (cddr sre))) + (+ n 1) + #t)) (start-src-offset (* n 4)) (start-index-offset (+ 1 (* n 4))) (end-src-offset (+ 2 (* n 4))) @@ -3069,6 +3096,7 @@ (irregex-match-start-index-set! matches 0 (cdr init)) (irregex-match-end-chunk-set! matches 0 src) (irregex-match-end-index-set! matches 0 i) + (%irregex-match-fail-set! matches fail) matches))) ;; XXXX this should be inlined (define (rec sre) (lp sre n flags next)) diff --git a/tests/scrutiny.expected b/tests/scrutiny.expected index 9c5c82a8..cd3a5bc4 100644 --- a/tests/scrutiny.expected +++ b/tests/scrutiny.expected @@ -1,6 +1,6 @@ Warning: at toplevel: - use of deprecated toplevel identifier `current-environment' + use of deprecated library procedure `current-environment' Warning: in local procedure `c', in local procedure `b', diff --git a/types.db b/types.db index b1a5cb19..e11c436e 100644 --- a/types.db +++ b/types.db @@ -537,30 +537,58 @@ ;; irregex (irregex (procedure irregex (#!rest) *)) -(string->irregex (procedure string->irregex (string #!rest) *)) -(sre->irregex (procedure sre->irregex (#!rest) *)) -(string->sre (procedure string->sre (string #!rest) *)) -(irregex? (procedure irregex? (*) boolean)) +;irregex-apply-match +(irregex-dfa (procedure irregex-dfa (*) *)) +(irregex-dfa/extract (procedure irregex-dfa/extract (*) *)) +(irregex-dfa/search (procedure irregex-dfa/search (*) *)) +(irregex-extract (procedure irregex-extract (* string #!optional fixnum fixnum) list)) +(irregex-flags (procedure irregex-flags (*) *)) +(irregex-fold (procedure irregex-fold (* (procedure (fixnum (struct regexp-match)) *) * string #!optional (procedure (fixnum *) *) fixnum fixnum) *)) +(irregex-fold/chunked (procedure irregex-fold/chunked (* (procedure (fixnum (struct regexp-match)) *) * procedure * #!optional (procedure (fixnum *) *) fixnum fixnum) *)) +(irregex-lengths (procedure irregex-lengths (*) *)) +(irregex-match (procedure irregex-match (* string) *)) +;irregex-match? (irregex-match-data? (procedure irregex-match-data? (*) boolean)) -(irregex-new-matches (procedure irregex-new-matches (*) *)) -(irregex-reset-matches! (procedure irregex-reset-matches! (*) *)) -(irregex-match-start (procedure irregex-match-start (* #!optional *) *)) (irregex-match-end (procedure irregex-match-end (* #!optional *) *)) -(irregex-match-substring (procedure irregex-match-substring (* #!optional *) *)) -(irregex-search (procedure irregex-search (* string #!optional fixnum fixnum) *)) -(irregex-search/matches (procedure irregex-search/matches (* string fixnum fixnum *) *)) -(irregex-match (procedure irregex-match (* string) *)) +;irregex-match-end-chunk +(irregex-match-end-index (procedure irregex-match-end-index ((struct regexp-match) *) fixnum)) +(irregex-match-names (procedure irregex-match-names ((struct regexp-match)) list)) +(irregex-match-num-submatches (procedure irregex-match-num-submatches ((struct regexp-match)) fixnum)) +(irregex-match-start (procedure irregex-match-start (* #!optional *) *)) +;irregex-match-start-chunk +(irregex-match-start-index (procedure irregex-match-start-index ((struct regexp-match) *) fixnum)) (irregex-match-string (procedure irregex-match-string (*) *)) +(irregex-match-subchunk (procedure irregex-match-subchunk ((struct regexp-match) #!optional *) *)) +(irregex-match-substring (procedure irregex-match-substring (* #!optional *) *)) +(irregex-match/chunked (procedure irregex-match/chunked (* * * #!optional fixnum) *)) +(irregex-names (procedure irregex-names (*) *)) +(irregex-new-matches (procedure irregex-new-matches (*) *)) +(irregex-nfa (procedure irregex-nfa (*) *)) +(irregex-num-submatches (procedure irregex-num-submatches (*) fixnum)) +(irregex-opt (procedure irregex-opt (list) *)) +(irregex-quote (procedure irregex-quote (string) string)) (irregex-replace (procedure irregex-replace (* string #!rest) *)) (irregex-replace/all (procedure irregex-replace/all (* string #!rest) *)) -(irregex-dfa (procedure irregex-dfa (*) *)) -(irregex-dfa/search (procedure irregex-dfa/search (*) *)) -(irregex-dfa/extract (procedure irregex-dfa/extract (*) *)) -(irregex-nfa (procedure irregex-nfa (*) *)) -(irregex-flags (procedure irregex-flags (*) *)) +(irregex-reset-matches! (procedure irregex-reset-matches! (*) *)) +(irregex-search (procedure irregex-search (* string #!optional fixnum fixnum) *)) +(irregex-search/matches (procedure irregex-search/matches (* string fixnum fixnum *) *)) +(irregex-split (procedure irregex-split (* string #!optional fixnum fixnum) list)) (irregex-submatches (procedure irregex-submatches (*) *)) -(irregex-lengths (procedure irregex-lengths (*) *)) -(irregex-names (procedure irregex-names (*) *)) +(irregex? (procedure irregex? (*) boolean)) +(make-irregex-chunker + (procedure make-irregex-chunker + ((procedure (*) *) + (procedure (*) *) + #!optional + (procedure (*) *) + (procedure (*) *) + (procedure (* fixnum * fixnum) string) + (procedure (* fixnum * fixnum) *)) + *)) +(maybe-string->sre (procedure maybe-string->sre (*) *)) +(sre->irregex (procedure sre->irregex (#!rest) *)) +(string->irregex (procedure string->irregex (string #!rest) *)) +(string->sre (procedure string->sre (string #!rest) *)) ;; lolevelTrap