~ chicken-core (chicken-5) 2ef671b6d4f3266815f7795b0d81d1d579f2056b


commit 2ef671b6d4f3266815f7795b0d81d1d579f2056b
Author:     Peter Bex <peter.bex@xs4all.nl>
AuthorDate: Sun Jun 22 18:02:33 2014 +0200
Commit:     Mario Domenech Goulart <mario.goulart@gmail.com>
CommitDate: Tue Jul 1 09:49:15 2014 -0300

    Fix #878 which was indeed a bug, caused by an incorrect hand-rolled optimisation.
    
    This adds some more "integration" test cases so that we can verify
    more easily that the combination of make-kmp-restart-vector and
    kmp-step is looping through the pattern correctly as it advances
    through the search string.
    
    The optimisation was due to a mistaken reading of the reference
    implementation: the pattern was indexed as pat[k+1] at the j=-1 case
    and as pat[k] at the pat[k]=pat[j+start] case, but the optimisation
    changed the code to use pat[k] in both cases.
    
    Signed-off-by: Mario Domenech Goulart <mario.goulart@gmail.com>

diff --git a/NEWS b/NEWS
index c8ce4276..0d62ee04 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,7 @@
      require extras but use procedures from it.
   - SRFI-13: fix string-copy! in cases source and destination strings'
     memory areas overlap (#1135).
+  - Fixed bug in make-kmp-restart-vector from SRFI-13.
 
 - Unit lolevel:
   - Restore long-lost but still documented "vector-like?" procedure (#983)
diff --git a/srfi-13.scm b/srfi-13.scm
index 65b748f9..dec54b27 100644
--- a/srfi-13.scm
+++ b/srfi-13.scm
@@ -1404,35 +1404,36 @@
                   ((c= char=?) rest) ; (procedure? c=))
      (receive (rest2 start end) (string-parse-start+end make-kmp-restart-vector pattern rest)
        (let* ((rvlen (- end start))
-	   (rv (make-vector rvlen -1)))
+	      (rv (make-vector rvlen -1)))
       (if (> rvlen 0)
 	  (let ((rvlen-1 (- rvlen 1))
 		(c0 (string-ref pattern start)))
 
 	    ;; Here's the main loop. We have set rv[0] ... rv[i].
 	    ;; K = I + START -- it is the corresponding index into PATTERN.
-	    (let lp1 ((i 0) (j -1) (k start))	
+	    (let lp1 ((i 0) (j -1) (k start))
 	      (if (< i rvlen-1)
 
-		  (let ((ck (string-ref pattern k)))
-		    ;; lp2 invariant:
-		    ;;   pat[(k-j) .. k-1] matches pat[start .. start+j-1]
-		    ;;   or j = -1.
-		    (let lp2 ((j j))
-
-		      (cond ((= j -1)
-			     (let ((i1 (+ i 1)))
-			       (vector-set! rv i1 (if (c= ck c0) -1 0))
-			       (lp1 i1 0 (+ k 1))))
-
-			    ;; pat[(k-j) .. k] matches pat[start..start+j].
-			    ((c= ck (string-ref pattern (+ j start)))
-			     (let* ((i1 (+ 1 i))
-				    (j1 (+ 1 j)))
-			       (vector-set! rv i1 j1)
-			       (lp1 i1 j1 (+ k 1))))
-
-			    (else (lp2 (vector-ref rv j))))))))))
+		  ;; lp2 invariant:
+		  ;;   pat[(k-j) .. k-1] matches pat[start .. start+j-1]
+		  ;;   or j = -1.
+		  (let lp2 ((j j))
+
+		    (cond ((= j -1)
+			   (let ((i1 (+ i 1))
+				 (ck+1 (string-ref pattern (add1 k))))
+			     (vector-set! rv i1 (if (c= ck+1 c0) -1 0))
+			     (lp1 i1 0 (+ k 1))))
+
+			  ;; pat[(k-j) .. k] matches pat[start..start+j].
+			  ((c= (string-ref pattern k)
+			       (string-ref pattern (+ j start)))
+			   (let* ((i1 (+ 1 i))
+				  (j1 (+ 1 j)))
+			     (vector-set! rv i1 j1)
+			     (lp1 i1 j1 (+ k 1))))
+
+			  (else (lp2 (vector-ref rv j)))))))))
       rv))))
 
 
diff --git a/tests/srfi-13-tests.scm b/tests/srfi-13-tests.scm
index 8df378ac..bc32885b 100644
--- a/tests/srfi-13-tests.scm
+++ b/tests/srfi-13-tests.scm
@@ -619,17 +619,65 @@
 
 (test "make-kmp-restart-vector" '#(-1) (make-kmp-restart-vector "a"))
 
-;;; The following two tests for make-kmp-restart-vector are
-;;; intentionally commented (see http://bugs.call-cc.org/ticket/878)
-;;; -- mario
-
-; This seems right to me, but is it?
-; (test "make-kmp-restart-vector" '#(-1 0) (make-kmp-restart-vector "ab"))
-
-; The following is from an example in the code, but I expect it is not right.
-; (test "make-kmp-restart-vector" '#(-1 0 0 -1 1 2) (make-kmp-restart-vector "abdabx"))
-
-
+(test "make-kmp-restart-vector" '#(-1 0) (make-kmp-restart-vector "ab"))
+
+; The following is from an example in the code.  It is the "optimised"
+; version; it's also valid to return #(-1 0 0 0 1 2), but that will
+; needlessly check the "a" twice before giving up.
+(test "make-kmp-restart-vector"
+      '#(-1 0 0 -1 1 2)
+      (make-kmp-restart-vector "abdabx"))
+
+;; Each entry in kmp-cases is a pattern, a string to match against and
+;; the expected run of the algorithm through the positions in the
+;; pattern.  So for example 0 1 2 means it looks at position 0 first,
+;; then at 1 and then at 2.
+;;
+;; This is easy to verify in simple cases; If there's a shared
+;; substring and matching fails, you try matching again starting at
+;; the end of the shared substring, otherwise you rewind.  For more
+;; complex cases, it's increasingly difficult for humans to verify :)
+(define kmp-cases
+  '(("abc" "xx" #f 0 0)
+    ("abc" "abc" #t 0 1 2)
+    ("abcd" "abc" #f 0 1 2)
+    ("abc" "abcd" #t 0 1 2)
+    ("abc" "aabc" #t 0 1 1 2)
+    ("ab" "aa" #f 0 1)
+    ("ab" "aab" #t 0 1 1)
+    ("abdabx" "abdbbabda" #f 0 1 2 3 0 0 1 2 3)
+    ("aabc" "axaabc" #t 0 1 0 1 2 3)
+    ("aabac" "aabaabac" #t 0 1 2 3 4 2 3 4)))
+
+(for-each
+ (lambda (test-case)
+   (let* ((pat (car test-case))
+	  (n (string-length pat))
+	  (str (cadr test-case))
+          (match? (caddr test-case))
+	  (steps (cdddr test-case))
+	  (rv (make-kmp-restart-vector pat)))
+     (call-with-input-string
+      str
+      (lambda (p)
+	(let lp ((i 0)
+		 (step 0)
+		 (steps steps))
+	  (cond
+	   ((or (= i n) (eof-object? (peek-char p)))
+	    (test-assert (sprintf "KMP match? ~S, case: ~S" match? test-case)
+			 (eq? (= i n) match?))
+	    (test-assert (sprintf "KMP empty remaining steps: ~S, case: ~S"
+			   steps test-case)
+			 (null? steps)))
+	   (else
+	    (let ((new-i (kmp-step pat rv (read-char p) i char=? 0))
+		  (expected-i (and (not (null? steps)) (car steps))))
+	      (test (sprintf "KMP step ~S (exp: ~S, act: ~S), case: ~S"
+		      step expected-i i test-case)
+		    expected-i i)
+	      (lp new-i (add1 step) (cdr steps))))))))))
+ kmp-cases)
 
 ; FIXME!  Implement tests for these:
 ;   string-kmp-partial-search
Trap