~ chicken-core (chicken-5) 5fda7f66ffed85de9e32ccaf6408fd6487e5e3a8


commit 5fda7f66ffed85de9e32ccaf6408fd6487e5e3a8
Author:     felix <bunny351@gmail.com>
AuthorDate: Wed May 12 16:32:24 2010 +0200
Commit:     felix <bunny351@gmail.com>
CommitDate: Wed May 12 16:32:24 2010 +0200

    added benchmark test for regular expression caching

diff --git a/tests/sgrep.scm b/tests/sgrep.scm
new file mode 100644
index 00000000..b88890c2
--- /dev/null
+++ b/tests/sgrep.scm
@@ -0,0 +1,143 @@
+;;;; sgrep.scm - grepping benchmark
+
+
+(use regex extras utils posix srfi-1)
+
+
+(define big-string
+  (read-all (optional (command-line-arguments) "compiler.scm")))
+
+;; hack for missing safe variants of fxmod
+
+(define-compiler-syntax fxmod
+  (syntax-rules ()
+    ((_ x y) (##core#inline "C_fixnum_modulo" x y))))
+
+(define-syntax bgrep
+  (syntax-rules ()
+    ((_ n expr)
+     (time
+      (do ((i n (fx- i 1)))
+	  ((eq? i 0))
+	(with-input-from-string big-string
+	  (lambda ()
+	    (let ((h 0)
+		  (c 0))
+	      (scan-input-lines
+	       (lambda (line)
+		 (set! c (fx+ c 1))
+		 (when (zero? (fxmod c 100)) (print* "."))
+		 (when (string-search expr line)
+		   (set! h (fx+ h 1)))
+		 #f))
+	      (newline)
+	      h))))))))
+
+(define the-cache)
+
+(define-syntax (build-cache x r c)
+  ;; (build-cache N ARG FAIL) 
+  (let* ((n (cadr x))
+	 (n2 (* n 2))
+	 (arg (caddr x))
+	 (fail (cadddr x))
+	 (%cache (r 'cache))
+	 (%index (r 'index))
+	 (%arg (r 'arg))
+	 (%let (r 'let))
+	 (%let* (r 'let*))
+	 (%if (r 'if))
+	 (%fx+ (r 'fx+))
+	 (%fxmod (r 'fxmod))
+	 (%equal? (r 'equal?))
+	 (%quote (r 'quote))
+	 (%tmp (r 'tmp))
+	 (%begin (r 'begin))
+	 (cache (make-vector (add1 n2) #f)))
+    (vector-set! cache n2 0)
+    `(,%let* ((,%cache (,%quote ,cache))
+	      (,%arg ,arg))
+	     (set! the-cache ,%cache)	;XXX
+	     ,(let fold ((i 0))
+		(if (>= i n)
+		    `(,%let ((,%tmp ,fail)
+			     (,%index (##sys#slot ,%cache ,n2)))
+			    (##sys#setslot ,%cache ,%index ,%arg)
+			    (##sys#setslot ,%cache (,%fx+ ,%index 1) ,%tmp)
+			    (##sys#setislot ,%cache ,n2 (,%fxmod (,%fx+ ,%index 2) ,n2))
+			    ,%tmp)
+		    `(,%if (,%equal? (##sys#slot ,%cache ,(* i 2)) ,%arg)
+			   (##sys#slot ,%cache ,(add1 (* i 2)))
+			   ,(fold (add1 i))))))))
+
+(define-syntax rx1
+  (syntax-rules ()
+    ((_) "\\((.*), (.*)\\)")))
+
+(define-syntax rx2
+  (syntax-rules ()
+    ((_) '(: #\( (submatch (* any)) ", " (submatch (* any))))))
+
+(define (regexp2 rx)
+  (build-cache 
+   5 rx
+   (regexp rx)))
+
+#|
+;; slow
+(print "baseline/literal")
+(bgrep 1 (rx1))
+
+(print "baseline/literal (SRE)")
+(bgrep 1 (rx2))
+|#
+
+(print "baseline/precompiled")
+(define rx (regexp (rx1)))
+(bgrep 1 rx)
+
+(print "test cache fill")
+(do ((lst (list-tabulate 10 number->string) (cdr lst)))
+    ((null? lst))
+  (assert (string-match (regexp2 (car lst)) (car lst))))
+(print the-cache)
+
+(print "cached/literal")
+(bgrep 1 (regexp2 (rx1)))
+
+(print the-cache)
+
+(print "cached/literal (SRE)")
+(bgrep 1 (regexp2 (rx2)))
+
+(print the-cache)
+
+(define-compiler-syntax (string-search x r c)
+  (let ((%quote (r 'quote))
+	(%let (r 'let))
+	(%string-search (r 'string-search))
+	(%regexp (r 'regexp))
+	(%or (r 'or))
+	(%let* (r 'let*)))
+    (let ((rx (cadr x)))
+      (if (or (string? rx) 
+	      (and (pair? rx) (c (car rx) %quote)))
+	  (let ((cache (vector #f))
+		(%cache (r 'cache))
+		(%tmp (r 'tmp)))
+	    `(,%let* ((,%cache (,%quote ,cache))
+		      (,%tmp (##sys#slot ,%cache 0)))
+		     (,%string-search
+		      (,%or ,%tmp
+			    (,%let ((,%tmp (,%regexp ,rx)))
+				   (##sys#setslot ,%cache 0 ,%tmp)
+				   ,%tmp))
+		      ,@(cddr x))))
+	  x))))
+
+(print "inline cached/literal")
+(bgrep 1 "\\((.*), (.*)\\)")
+(print "inline cached/literal (SRE)")
+(bgrep 1 '(: #\( (submatch (* any)) ", " (submatch (* any))))
+
+
Trap