~ chicken-core (chicken-5) 1acdbfa1ba163929eab4d032c0e07bbb6b543f5d
commit 1acdbfa1ba163929eab4d032c0e07bbb6b543f5d Author: Peter Bex <Peter.Bex@xs4all.nl> AuthorDate: Sat Oct 23 18:59:55 2010 +0200 Commit: Peter Bex <Peter.Bex@xs4all.nl> CommitDate: Sat Oct 23 18:59:55 2010 +0200 Apply upstream changeset 2e55ccfbbab7 (Change all submatch accessors to return #f in case of a defined, but nonmatching subchunk. They still throw errors on undefined subchunks. The representation of match objects was also changed to have only the absolutely required vector size (it was too big by one slot, which caused problems when trying to determine the number of defined numbered submatches)). Also fix the Chicken internal submatch vector size of match objects (it was too big because it allocated room for stuff that was pulled out of the submatch vector) diff --git a/irregex-core.scm b/irregex-core.scm index 08b94f2a..ab634f2b 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -159,8 +159,11 @@ (display " submatches)>" out))) (define-inline (irregex-match-valid-numeric-index? m n) (let ((v (internal "##sys#slot" m 1))) - (and (>= n 0) (< (* n 4) (internal "##sys#size" v)) - (internal "##sys#slot" v (+ 1 (* n 4)))))))) + (and (>= n 0) (< (* n 4) (- (internal "##sys#size" v) 4))))) + (define-inline (irregex-match-matched-numeric-index? m n) + (let ((v (internal "##sys#slot" m 1))) + (and (internal "##sys#slot" v (+ 1 (* n 4))) + #t))))) (else (begin (define irregex-tag '*irregex-tag*) @@ -198,7 +201,7 @@ (>= (vector-length obj) 11) (eq? irregex-match-tag (vector-ref obj 0)))) (define (make-irregex-match count names) - (let ((res (make-vector (+ (* 4 (+ 2 count)) 4) #f))) + (let ((res (make-vector (+ (* 4 (+ 2 count)) 3) #f))) (vector-set! res 0 irregex-match-tag) (vector-set! res 2 names) res)) @@ -217,8 +220,9 @@ (define (%irregex-match-fail m) (vector-ref m (- (vector-length m) 1))) (define (%irregex-match-fail-set! m x) (vector-set! m (- (vector-length m) 1) x)) (define (irregex-match-valid-numeric-index? m n) - (and (>= n 0) (< (+ 3 (* n 4)) (vector-length m)) - (vector-ref m (+ 4 (* n 4))) + (and (>= n 0) (< (+ 3 (* n 4)) (- (vector-length m) 4)))) + (define (irregex-match-matched-numeric-index? m n) + (and (vector-ref m (+ 4 (* n 4))) #t))))) (define (irregex-match-valid-named-index? m n) @@ -227,17 +231,17 @@ ;; public interface with error checking (define (irregex-match-start-chunk m . opt) - (let ((n (irregex-match-numeric-index 'irregex-match-start-chunk m opt #t))) - (%irregex-match-start-chunk m n))) + (let ((n (irregex-match-numeric-index 'irregex-match-start-chunk m opt))) + (and n (%irregex-match-start-chunk m n)))) (define (irregex-match-start-index m . opt) - (let ((n (irregex-match-numeric-index 'irregex-match-start-index m opt #t))) - (%irregex-match-start-index m n))) + (let ((n (irregex-match-numeric-index 'irregex-match-start-index m opt))) + (and n (%irregex-match-start-index m n)))) (define (irregex-match-end-chunk m . opt) - (let ((n (irregex-match-numeric-index 'irregex-match-end-chunk m opt #t))) - (%irregex-match-end-chunk m n))) + (let ((n (irregex-match-numeric-index 'irregex-match-end-chunk m opt))) + (and n (%irregex-match-end-chunk m n)))) (define (irregex-match-end-index m . opt) - (let ((n (irregex-match-numeric-index 'irregex-match-end-index m opt #t))) - (%irregex-match-end-index m n))) + (let ((n (irregex-match-numeric-index 'irregex-match-end-index m opt))) + (and n (%irregex-match-end-index m n)))) (define (irregex-match-start-chunk-set! m n start) (vector-set! m (+ 3 (* n 4)) start)) @@ -251,10 +255,9 @@ ;; Helper procedure to convert any type of index from a rest args list ;; to a numeric index. Named submatches are converted to their corresponding ;; numeric index, and numeric submatches are checked for validity. -;; If strict? is true, an error is raised for invalid numeric indices. -;; #f is returned if strict? is false, but unknown named submatches always -;; cause an error, regardless of strict?ness -(define (irregex-match-numeric-index location m opt strict?) +;; An error is raised for invalid numeric or named indices, #f is returned +;; for defined but nonmatching indices. +(define (irregex-match-numeric-index location m opt) (cond ((not (irregex-match-data? m)) (%irregex-error location "not match data" m)) @@ -265,9 +268,9 @@ (let ((n (car opt))) (if (number? n) (if (and (integer? n) (exact? n)) - (or (and (irregex-match-valid-numeric-index? m n) n) - (and strict? - (%irregex-error location "not a valid index" m n))) + (if (irregex-match-valid-numeric-index? m n) + (and (irregex-match-matched-numeric-index? m n) n) + (%irregex-error location "not a valid index" m n)) (%irregex-error location "not an exact integer" n)) (let lp ((ls (irregex-match-names m)) (unknown? #t)) @@ -291,7 +294,7 @@ (irregex-match-valid-named-index? m n))) (define (irregex-match-substring m . opt) - (let* ((n (irregex-match-numeric-index 'irregex-match-substring m opt #f)) + (let* ((n (irregex-match-numeric-index 'irregex-match-substring m opt)) (cnk (irregex-match-chunker m))) (and n ((chunker-get-substring cnk) @@ -301,7 +304,7 @@ (%irregex-match-end-index m n))))) (define (irregex-match-subchunk m . opt) - (let* ((n (irregex-match-numeric-index 'irregex-match-subchunk m opt #f)) + (let* ((n (irregex-match-numeric-index 'irregex-match-subchunk m opt)) (cnk (irregex-match-chunker m)) (get-subchunk (chunker-get-subchunk cnk))) (if (not get-subchunk) diff --git a/irregex.scm b/irregex.scm index fb4cf21e..989c9e94 100644 --- a/irregex.scm +++ b/irregex.scm @@ -150,10 +150,10 @@ ((_ count names) (##sys#make-structure 'regexp-match - (make-vector (+ (* 4 (+ 2 count)) 3) #f) ; #1: submatches - names ; #2: (guess) - #f ; #3: chunka - #f)))) ; #4: fail + (make-vector (* 4 (+ 2 count)) #f) ; #1: submatches + names ; #2: (guess) + #f ; #3: chunka + #f)))) ; #4: fail (define-compiler-syntax bit-shl (syntax-rules () diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm index d3c41b99..8834ab98 100644 --- a/tests/test-irregex.scm +++ b/tests/test-irregex.scm @@ -8,7 +8,8 @@ (define (subst-matches matches subst) (define (submatch n) (if (irregex-match-data? matches) - (irregex-match-substring matches n) + (and (irregex-match-valid-index? matches n) + (irregex-match-substring matches n)) (list-ref matches n))) (and matches @@ -271,40 +272,66 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (test-group "API" - (test-assert (irregex? (irregex "a.*b"))) - (test-assert (irregex? (irregex '(: "a" (* any) "b")))) - (test-assert (not (irregex? (vector '*irregex-tag* #f #f #f #f #f #f #f)))) - (test-assert (not (irregex? (vector #f #f #f #f #f #f #f #f #f)))) - (test-assert (irregex-match-data? (irregex-search "a.*b" "axxxb"))) - (test-assert (irregex-match-data? (irregex-match "a.*b" "axxxb"))) - (test-assert (not (irregex-match-data? (vector '*irregex-match-tag* #f #f #f #f #f #f #f #f #f)))) - (test-assert (not (irregex-match-data? (vector #f #f #f #f #f #f #f #f #f #f #f)))) - (test-equal 0 (irregex-num-submatches (irregex "a.*b"))) - (test-equal 1 (irregex-num-submatches (irregex "a(.*)b"))) - (test-equal 2 (irregex-num-submatches (irregex "(a(.*))b"))) - (test-equal 2 (irregex-num-submatches (irregex "a(.*)(b)"))) - (test-equal 10 (irregex-num-submatches (irregex "((((((((((a))))))))))"))) - (test-equal 0 (irregex-match-num-submatches (irregex-search "a.*b" "axxxb"))) - (test-equal 1 (irregex-match-num-submatches (irregex-search "a(.*)b" "axxxb"))) - (test-equal 2 (irregex-match-num-submatches (irregex-search "(a(.*))b" "axxxb"))) - (test-equal 2 (irregex-match-num-submatches (irregex-search "a(.*)(b)" "axxxb"))) - (test-equal 10 (irregex-match-num-submatches (irregex-search "((((((((((a))))))))))" "a"))) - (test-assert - (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 0)) - (test-assert - (not (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 1))) - (test-assert - (not (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") -1))) - (test-assert - (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 0)) - (test-assert - (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 1)) - (test-assert - (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 2)) - (test-assert - (not (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 3))) - (test-equal 1 (irregex-match-start-index (irregex-search "a(.*)(b)" "axxxb") 1)) - (test-equal 4 (irregex-match-end-index (irregex-search "a(.*)(b)" "axxxb") 1)) + (test-group "predicates" + (test-assert (irregex? (irregex "a.*b"))) + (test-assert (irregex? (irregex '(: "a" (* any) "b")))) + (test-assert (not (irregex? (vector '*irregex-tag* #f #f #f #f #f #f #f)))) + (test-assert (not (irregex? (vector #f #f #f #f #f #f #f #f #f)))) + (test-assert (irregex-match-data? (irregex-search "a.*b" "axxxb"))) + (test-assert (irregex-match-data? (irregex-match "a.*b" "axxxb"))) + (test-assert (not (irregex-match-data? (vector '*irregex-match-tag* #f #f #f #f #f #f #f #f #f)))) + (test-assert (not (irregex-match-data? (vector #f #f #f #f #f #f #f #f #f #f #f))))) + (test-group "valid index" + (test-assert + (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 0)) + (test-assert + (not (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 1))) + (test-assert + (not (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") -1))) + (test-assert + (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "axxx") 0)) + (test-assert + (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "axxx") 1)) + (test-assert + (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "axxx") 2)) + (test-assert + (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "b") 2)) + (test-assert + (not (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 3))) + (test-assert + (not (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") -1)))) + (test-group "number of submatches" + (test-equal 0 (irregex-num-submatches (irregex "a.*b"))) + (test-equal 1 (irregex-num-submatches (irregex "a(.*)b"))) + (test-equal 2 (irregex-num-submatches (irregex "(a(.*))b"))) + (test-equal 2 (irregex-num-submatches (irregex "a(.*)(b)"))) + (test-equal 10 (irregex-num-submatches (irregex "((((((((((a))))))))))"))) + (test-equal 0 (irregex-match-num-submatches (irregex-search "a.*b" "axxxb"))) + (test-equal 1 (irregex-match-num-submatches (irregex-search "a(.*)b" "axxxb"))) + (test-equal 2 (irregex-match-num-submatches (irregex-search "(a(.*))b" "axxxb"))) + (test-equal 2 (irregex-match-num-submatches (irregex-search "a(.*)(b)" "axxxb"))) + (test-equal 10 (irregex-match-num-submatches (irregex-search "((((((((((a))))))))))" "a")))) + (test-group "match substring" + (test-equal "axxxb" (irregex-match-substring (irregex-search "a.*b" "axxxb") 0)) + (test-error (irregex-match-substring (irregex-search "a.*b" "axxxb") 1)) + (test-equal "xxx" (irregex-match-substring (irregex-search "a(.*)|b" "axxx") 1)) + (test-equal #f (irregex-match-substring (irregex-search "a(.*)|b" "b") 1)) + (test-error (irregex-match-substring (irregex-search "a(.*)|b" "axxx") 2)) + (test-error (irregex-match-substring (irregex-search "a(.*)|b" "b") 2))) + (test-group "match start-index" + (test-equal 0 (irregex-match-start-index (irregex-search "a.*b" "axxxb") 0)) + (test-error (irregex-match-start-index (irregex-search "a.*b" "axxxb") 1)) + (test-equal 1 (irregex-match-start-index (irregex-search "a(.*)|b" "axxx") 1)) + (test-equal #f (irregex-match-start-index (irregex-search "a(.*)|b" "b") 1)) + (test-error (irregex-match-start-index (irregex-search "a(.*)|b" "axxx") 2)) + (test-error (irregex-match-start-index (irregex-search "a(.*)|b" "b") 2))) + (test-group "match end-index" + (test-equal 5 (irregex-match-end-index (irregex-search "a.*b" "axxxb") 0)) + (test-error (irregex-match-end-index (irregex-search "a.*b" "axxxb") 1)) + (test-equal 4 (irregex-match-end-index (irregex-search "a(.*)|b" "axxx") 1)) + (test-equal #f (irregex-match-end-index (irregex-search "a(.*)|b" "b") 1)) + (test-error (irregex-match-end-index (irregex-search "a(.*)|b" "axxx") 2)) + (test-error (irregex-match-end-index (irregex-search "a(.*)|b" "b") 2))) ) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Trap