~ chicken-core (chicken-5) 04b31b97c5ba6fea018c6f99ba9bc529b23144bd


commit 04b31b97c5ba6fea018c6f99ba9bc529b23144bd
Author:     felix <felix@call-with-current-continuation.org>
AuthorDate: Sat Jul 17 23:47:51 2010 +0200
Commit:     felix <felix@call-with-current-continuation.org>
CommitDate: Tue Jul 27 13:09:33 2010 +0200

    upgraded irregex to 0.8.1 and updated types.db; build-chicken feature

diff --git a/defaults.make b/defaults.make
index 4d6f183f..3b7217fc 100644
--- a/defaults.make
+++ b/defaults.make
@@ -293,7 +293,7 @@ CSI ?= csi$(EXE)
 
 # Scheme compiler flags
 
-CHICKEN_OPTIONS = -optimize-level 2 -include-path . -include-path $(SRCDIR) -inline -ignore-repository
+CHICKEN_OPTIONS = -optimize-level 2 -include-path . -include-path $(SRCDIR) -inline -ignore-repository -feature building-chicken
 ifdef DEBUGBUILD
 CHICKEN_OPTIONS += -feature debugbuild -scrutinize -types $(SRCDIR)types.db
 else
diff --git a/irregex-core.scm b/irregex-core.scm
index a235243e..040136bd 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -31,6 +31,7 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;; History
 ;;
+;; 0.8.1: 2010/03/09 - backtracking irregex-match fix and other small fixes
 ;; 0.8.0: 2010/01/20 - optimizing DFA compilation, adding SRE escapes
 ;;                     inside PCREs, adding utility SREs
 ;; 0.7.5: 2009/08/31 - adding irregex-extract and irregex-split
@@ -118,9 +119,10 @@
        (internal
 	"##sys#make-structure"
 	'regexp-match
-	(make-vector (+ (* 4 (+ 2 count)) 3) #f)
-	names
-	#f))
+	(make-vector (+ (* 4 (+ 2 count)) 3) #f) ; #1: submatches
+	names					 ; #2: (guess)
+	#f					 ; #3: chunka
+	#f))					 ; #4: fail
      (define (irregex-new-matches irx)
        (make-irregex-match (irregex-num-submatches irx) (irregex-names irx)))
      (define (irregex-reset-matches! m)
@@ -137,12 +139,13 @@
 	       (vector-copy! v v2)
 	       v2)
 	     (internal "##sys#slot" m 2)
-	     (internal "##sys#slot" m 3))))
+	     (internal "##sys#slot" m 3)
+	     (internal "##sys#slot" m 4))))
      (define (irregex-match-data? obj)
        (internal "##sys#structure?" obj 'regexp-match))
      (define (irregex-match-num-submatches m)
        (internal "##sys#check-structure" m 'regexp-match 'irregex-match-num-submatches)
-       (- (quotient (vector-length (internal "##sys#slot" m 1)) 4) 2))
+       (- (fx/ (internal "##sys#size" (internal "##sys#slot" m 1)) 4) 2))
      (define (irregex-match-chunker m)
        (internal "##sys#slot" m 3))
      (define (irregex-match-names m)
@@ -157,7 +160,9 @@
      (define-inline (%irregex-match-end-chunk m n)
        (internal "##sys#slot" (internal "##sys#slot" m 1) (+ 2 (* n 4))))
      (define (%irregex-match-end-index m n)
-       (internal "##sys#slot" (internal "##sys#slot" m 1) (+ 3 (* n 4))))))
+       (internal "##sys#slot" (internal "##sys#slot" m 1) (+ 3 (* n 4))))
+     (define (%irregex-match-fail m) (internal "##sys#slot" m 4))
+     (define (%irregex-match-fail-set! m x) (internal "##sys#setslot" m 4 x))))
   (else
    (begin
      (define (irregex-new-matches irx)
@@ -178,7 +183,7 @@
 	    (>= (vector-length obj) 11)
 	    (eq? irregex-match-tag (vector-ref obj 0))))
      (define (make-irregex-match count names)
-       (let ((res (make-vector (+ (* 4 (+ 2 count)) 3) #f)))
+       (let ((res (make-vector (+ (* 4 (+ 2 count)) 4) #f)))
 	 (vector-set! res 0 irregex-match-tag)
 	 (vector-set! res 2 names)
 	 res))
@@ -193,7 +198,9 @@
      (define (%irregex-match-start-chunk m n) (vector-ref m (+ 3 (* n 4))))
      (define (%irregex-match-start-index m n) (vector-ref m (+ 4 (* n 4))))
      (define (%irregex-match-end-chunk m n)   (vector-ref m (+ 5 (* n 4))))
-     (define (%irregex-match-end-index m n)   (vector-ref m (+ 6 (* n 4)))))))
+     (define (%irregex-match-end-index m n)   (vector-ref m (+ 6 (* n 4))))
+     (define (%irregex-match-fail m) (vector-ref m (- (vector-length m) 1)))
+     (define (%irregex-match-fail-set! m x) (vector-set! m (- (vector-length m) 1) x)))))
 
 ;; public interface with error checking
 (define (irregex-match-start-chunk m n)
@@ -256,7 +263,7 @@
   (if (not (irregex-match-data? m))
       (error "irregex-match-valid-index?: not match data" m))
   (if (not (integer? n))
-      (error "irregex-match-valid-index?: not a valid index" n))
+      (error "irregex-match-valid-index?: not an integer" n))
   (%irregex-match-valid-index? m n))
 
 (define (irregex-match-substring m . opt)
@@ -630,7 +637,7 @@
            (define (collect)
              (if (= from i) res (cons (substring str from i) res)))
            (if (>= i end)
-               (error "unterminated string in embeded SRE" str)
+               (error "unterminated string in embedded SRE" str)
                (case (string-ref str i)
                  ((#\") (k (string-cat-reverse (collect)) (+ i 1)))
                  ((#\\) (scan (+ i 1) (+ i 2) (collect)))
@@ -849,7 +856,7 @@
                (cond
                 ((>= (+ i 1) end)
                  (error "unterminated parenthesis in regexp" str))
-                ((not (memq (string-ref str (+ i 1)) '(#\? #\*))) ; normal case
+                ((not (memv (string-ref str (+ i 1)) '(#\? #\*))) ; normal case
                  (lp (+ i 1) (+ i 1) (flag-join flags ~save?) '() (save)))
                 ((>= (+ i 2) end)
                  (error "unterminated parenthesis in regexp" str))
@@ -1574,7 +1581,7 @@
 ;;;; Compilation
 
 (cond-expand
-  (chicken
+  (building-chicken
    (define-syntax cached
      (syntax-rules ()
        ((_ arg fail) (build-cache 5 arg fail)))))
@@ -2007,32 +2014,37 @@
      (else
       #f)))
    (else
-    (let ((matcher (irregex-nfa irx))
-          (str ((chunker-get-str cnk) src))
-          (end ((chunker-get-end cnk) src))
-          (get-next (chunker-get-next cnk))
-          (init (cons src i)))
-      (if (flag-set? (irregex-flags irx) ~searcher?)
-          (matcher cnk init src str i end matches (lambda () #f))
-          (let lp ((src2 src)
-                   (str str)
-                   (i i)
-                   (end end))
-            (cond
-             ((matcher cnk init src2 str i end matches (lambda () #f))
-              (irregex-match-start-chunk-set! matches 0 src2)
-              (irregex-match-start-index-set! matches 0 i)
-              matches)
-             ((< i end)
-              (lp src2 str (+ i 1) end))
-             (else
-              (let ((src2 (get-next src2)))
-                (if src2
-                    (lp src2
-                        ((chunker-get-str cnk) src2)
-                        ((chunker-get-start cnk) src2)
-                        ((chunker-get-end cnk) src2))
-                    #f))))))))))
+    (let ((res (irregex-search/backtrack irx cnk src i matches)))
+      (if res (%irregex-match-fail-set! res #f))
+      res))))
+
+(define (irregex-search/backtrack irx cnk src i matches)
+  (let ((matcher (irregex-nfa irx))
+        (str ((chunker-get-str cnk) src))
+        (end ((chunker-get-end cnk) src))
+        (get-next (chunker-get-next cnk))
+        (init (cons src i)))
+    (if (flag-set? (irregex-flags irx) ~searcher?)
+        (matcher cnk init src str i end matches (lambda () #f))
+        (let lp ((src2 src)
+                 (str str)
+                 (i i)
+                 (end end))
+          (cond
+           ((matcher cnk init src2 str i end matches (lambda () #f))
+            (irregex-match-start-chunk-set! matches 0 src2)
+            (irregex-match-start-index-set! matches 0 i)
+            matches)
+           ((< i end)
+            (lp src2 str (+ i 1) end))
+           (else
+            (let ((src2 (get-next src2)))
+              (if src2
+                  (lp src2
+                      ((chunker-get-str cnk) src2)
+                      ((chunker-get-start cnk) src2)
+                      ((chunker-get-end cnk) src2))
+                  #f))))))))
 
 (define (irregex-match irx str . o)
   (if (not (string? str)) (error "irregex-match: not a string" str))
@@ -2069,12 +2081,21 @@
              (str ((chunker-get-str cnk) src))
              (i ((chunker-get-start cnk) src))
              (end ((chunker-get-end cnk) src))
-             (m (matcher cnk src src str i end matches (lambda () #f))))
-        (and m
-             (not ((chunker-get-next cnk) (%irregex-match-end-chunk m 0)))
-             (= ((chunker-get-end cnk) (%irregex-match-end-chunk m 0))
-                (%irregex-match-end-index m 0))
-             m))))))
+             (init (cons src i)))
+        (let lp ((m (matcher cnk init src str i end matches (lambda () #f))))
+          (and m
+               (cond
+                ((and (not ((chunker-get-next cnk)
+                            (%irregex-match-end-chunk m 0)))
+                      (= ((chunker-get-end cnk)
+                          (%irregex-match-end-chunk m 0))
+                         (%irregex-match-end-index m 0)))
+                 (%irregex-match-fail-set! m #f)
+                 m)
+                ((%irregex-match-fail m)
+                 (lp ((%irregex-match-fail m))))
+                (else
+                 #f)))))))))
 
 (define (irregex-match? . args)
   (and (apply irregex-match args) #t))
@@ -2523,9 +2544,11 @@
                 ;;                            (sre-sequence (cdddar ls)))))
                 ;;             (cdr ls))
                 ;;     n flags next))
-                (($ submatch => submatch-named)
                  ;; ignore submatches altogether
+                (($ submatch)
                  (lp (cons (sre-sequence (cdar ls)) (cdr ls)) n flags next))
+                ((=> submatch-named)
+                 (lp (cons (sre-sequence (cddar ls)) (cdr ls)) n flags next))
                 (else
                  (cond
                   ((assq (caar ls) sre-named-definitions)
@@ -2917,8 +2940,8 @@
      (define-inline (match-vector-set! m i x)
        (vector-set! (internal "##sys#slot" m 1) i x))))
   (else
-   (define match-vector-ref vector-ref)
-   (define match-vector-set! vector-set!)))
+   (define (match-vector-ref v i) (vector-ref v (+ 3 i)))
+   (define (match-vector-set! v i x) (vector-set! v (+ 3 i) x))))
 
 (define (sre-match-extractor sre num-submatches)
   (let* ((tmp (+ num-submatches 1))
@@ -3026,9 +3049,13 @@
              (lambda (cnk start i end j matches)
                (match-once cnk start i end j matches)
                #t)))
-          (($ submatch)
+          (($ submatch => submatch-named)
            (let ((match-one
-                  (lp (sre-sequence (cdr sre)) (+ n 1) #t))
+                  (lp (sre-sequence (if (memq (car sre) '($ submatch))
+                                        (cdr sre)
+                                        (cddr sre)))
+                      (+ n 1)
+                      #t))
                  (start-src-offset (* n 4))
                  (start-index-offset (+ 1 (* n 4)))
                  (end-src-offset (+ 2 (* n 4)))
@@ -3069,6 +3096,7 @@
                    (irregex-match-start-index-set! matches 0 (cdr init))
                    (irregex-match-end-chunk-set! matches 0 src)
                    (irregex-match-end-index-set! matches 0 i)
+		   (%irregex-match-fail-set! matches fail)
                    matches)))
     ;; XXXX this should be inlined
     (define (rec sre) (lp sre n flags next))
diff --git a/tests/scrutiny.expected b/tests/scrutiny.expected
index 9c5c82a8..cd3a5bc4 100644
--- a/tests/scrutiny.expected
+++ b/tests/scrutiny.expected
@@ -1,6 +1,6 @@
 
 Warning: at toplevel:
-  use of deprecated toplevel identifier `current-environment'
+  use of deprecated library procedure `current-environment'
 
 Warning: in local procedure `c',
   in local procedure `b',
diff --git a/types.db b/types.db
index b1a5cb19..e11c436e 100644
--- a/types.db
+++ b/types.db
@@ -537,30 +537,58 @@
 ;; irregex
 
 (irregex (procedure irregex (#!rest) *))
-(string->irregex (procedure string->irregex (string #!rest) *))
-(sre->irregex (procedure sre->irregex (#!rest) *))
-(string->sre (procedure string->sre (string #!rest) *))
-(irregex? (procedure irregex? (*) boolean))
+;irregex-apply-match
+(irregex-dfa (procedure irregex-dfa (*) *))
+(irregex-dfa/extract (procedure irregex-dfa/extract (*) *))
+(irregex-dfa/search (procedure irregex-dfa/search (*) *))
+(irregex-extract (procedure irregex-extract (* string #!optional fixnum fixnum) list))
+(irregex-flags (procedure irregex-flags (*) *))
+(irregex-fold (procedure irregex-fold (* (procedure (fixnum (struct regexp-match)) *) * string #!optional (procedure (fixnum *) *) fixnum fixnum) *))
+(irregex-fold/chunked (procedure irregex-fold/chunked (* (procedure (fixnum (struct regexp-match)) *) * procedure * #!optional (procedure (fixnum *) *) fixnum fixnum) *))
+(irregex-lengths (procedure irregex-lengths (*) *))
+(irregex-match (procedure irregex-match (* string) *))
+;irregex-match?
 (irregex-match-data? (procedure irregex-match-data? (*) boolean))
-(irregex-new-matches (procedure irregex-new-matches (*) *))
-(irregex-reset-matches! (procedure irregex-reset-matches! (*) *))
-(irregex-match-start (procedure irregex-match-start (* #!optional *) *))
 (irregex-match-end (procedure irregex-match-end (* #!optional *) *))
-(irregex-match-substring (procedure irregex-match-substring (* #!optional *) *))
-(irregex-search (procedure irregex-search (* string #!optional fixnum fixnum) *))
-(irregex-search/matches (procedure irregex-search/matches (* string fixnum fixnum *) *))
-(irregex-match (procedure irregex-match (* string) *))
+;irregex-match-end-chunk
+(irregex-match-end-index (procedure irregex-match-end-index ((struct regexp-match) *) fixnum))
+(irregex-match-names (procedure irregex-match-names ((struct regexp-match)) list))
+(irregex-match-num-submatches (procedure irregex-match-num-submatches ((struct regexp-match)) fixnum))
+(irregex-match-start (procedure irregex-match-start (* #!optional *) *))
+;irregex-match-start-chunk
+(irregex-match-start-index (procedure irregex-match-start-index ((struct regexp-match) *) fixnum))
 (irregex-match-string (procedure irregex-match-string (*) *))
+(irregex-match-subchunk (procedure irregex-match-subchunk ((struct regexp-match) #!optional *) *))
+(irregex-match-substring (procedure irregex-match-substring (* #!optional *) *))
+(irregex-match/chunked (procedure irregex-match/chunked (* * * #!optional fixnum) *))
+(irregex-names (procedure irregex-names (*) *))
+(irregex-new-matches (procedure irregex-new-matches (*) *))
+(irregex-nfa (procedure irregex-nfa (*) *))
+(irregex-num-submatches (procedure irregex-num-submatches (*) fixnum))
+(irregex-opt (procedure irregex-opt (list) *))
+(irregex-quote (procedure irregex-quote (string) string))
 (irregex-replace (procedure irregex-replace (* string #!rest) *))
 (irregex-replace/all (procedure irregex-replace/all (* string #!rest) *))
-(irregex-dfa (procedure irregex-dfa (*) *))
-(irregex-dfa/search (procedure irregex-dfa/search (*) *))
-(irregex-dfa/extract (procedure irregex-dfa/extract (*) *))
-(irregex-nfa (procedure irregex-nfa (*) *))
-(irregex-flags (procedure irregex-flags (*) *))
+(irregex-reset-matches! (procedure irregex-reset-matches! (*) *))
+(irregex-search (procedure irregex-search (* string #!optional fixnum fixnum) *))
+(irregex-search/matches (procedure irregex-search/matches (* string fixnum fixnum *) *))
+(irregex-split (procedure irregex-split (* string #!optional fixnum fixnum) list))
 (irregex-submatches (procedure irregex-submatches (*) *))
-(irregex-lengths (procedure irregex-lengths (*) *))
-(irregex-names (procedure irregex-names (*) *))
+(irregex? (procedure irregex? (*) boolean))
+(make-irregex-chunker
+ (procedure make-irregex-chunker 
+	    ((procedure (*) *)
+	     (procedure (*) *)
+	     #!optional
+	     (procedure (*) *)
+	     (procedure (*) *)
+	     (procedure (* fixnum * fixnum) string)
+	     (procedure (* fixnum * fixnum) *))
+	    *))
+(maybe-string->sre (procedure maybe-string->sre (*) *))
+(sre->irregex (procedure sre->irregex (#!rest) *))
+(string->irregex (procedure string->irregex (string #!rest) *))
+(string->sre (procedure string->sre (string #!rest) *))
 
 ;; lolevel
 
Trap