~ chicken-core (chicken-5) 5fda7f66ffed85de9e32ccaf6408fd6487e5e3a8
commit 5fda7f66ffed85de9e32ccaf6408fd6487e5e3a8
Author: felix <bunny351@gmail.com>
AuthorDate: Wed May 12 16:32:24 2010 +0200
Commit: felix <bunny351@gmail.com>
CommitDate: Wed May 12 16:32:24 2010 +0200
added benchmark test for regular expression caching
diff --git a/tests/sgrep.scm b/tests/sgrep.scm
new file mode 100644
index 00000000..b88890c2
--- /dev/null
+++ b/tests/sgrep.scm
@@ -0,0 +1,143 @@
+;;;; sgrep.scm - grepping benchmark
+
+
+(use regex extras utils posix srfi-1)
+
+
+(define big-string
+ (read-all (optional (command-line-arguments) "compiler.scm")))
+
+;; hack for missing safe variants of fxmod
+
+(define-compiler-syntax fxmod
+ (syntax-rules ()
+ ((_ x y) (##core#inline "C_fixnum_modulo" x y))))
+
+(define-syntax bgrep
+ (syntax-rules ()
+ ((_ n expr)
+ (time
+ (do ((i n (fx- i 1)))
+ ((eq? i 0))
+ (with-input-from-string big-string
+ (lambda ()
+ (let ((h 0)
+ (c 0))
+ (scan-input-lines
+ (lambda (line)
+ (set! c (fx+ c 1))
+ (when (zero? (fxmod c 100)) (print* "."))
+ (when (string-search expr line)
+ (set! h (fx+ h 1)))
+ #f))
+ (newline)
+ h))))))))
+
+(define the-cache)
+
+(define-syntax (build-cache x r c)
+ ;; (build-cache N ARG FAIL)
+ (let* ((n (cadr x))
+ (n2 (* n 2))
+ (arg (caddr x))
+ (fail (cadddr x))
+ (%cache (r 'cache))
+ (%index (r 'index))
+ (%arg (r 'arg))
+ (%let (r 'let))
+ (%let* (r 'let*))
+ (%if (r 'if))
+ (%fx+ (r 'fx+))
+ (%fxmod (r 'fxmod))
+ (%equal? (r 'equal?))
+ (%quote (r 'quote))
+ (%tmp (r 'tmp))
+ (%begin (r 'begin))
+ (cache (make-vector (add1 n2) #f)))
+ (vector-set! cache n2 0)
+ `(,%let* ((,%cache (,%quote ,cache))
+ (,%arg ,arg))
+ (set! the-cache ,%cache) ;XXX
+ ,(let fold ((i 0))
+ (if (>= i n)
+ `(,%let ((,%tmp ,fail)
+ (,%index (##sys#slot ,%cache ,n2)))
+ (##sys#setslot ,%cache ,%index ,%arg)
+ (##sys#setslot ,%cache (,%fx+ ,%index 1) ,%tmp)
+ (##sys#setislot ,%cache ,n2 (,%fxmod (,%fx+ ,%index 2) ,n2))
+ ,%tmp)
+ `(,%if (,%equal? (##sys#slot ,%cache ,(* i 2)) ,%arg)
+ (##sys#slot ,%cache ,(add1 (* i 2)))
+ ,(fold (add1 i))))))))
+
+(define-syntax rx1
+ (syntax-rules ()
+ ((_) "\\((.*), (.*)\\)")))
+
+(define-syntax rx2
+ (syntax-rules ()
+ ((_) '(: #\( (submatch (* any)) ", " (submatch (* any))))))
+
+(define (regexp2 rx)
+ (build-cache
+ 5 rx
+ (regexp rx)))
+
+#|
+;; slow
+(print "baseline/literal")
+(bgrep 1 (rx1))
+
+(print "baseline/literal (SRE)")
+(bgrep 1 (rx2))
+|#
+
+(print "baseline/precompiled")
+(define rx (regexp (rx1)))
+(bgrep 1 rx)
+
+(print "test cache fill")
+(do ((lst (list-tabulate 10 number->string) (cdr lst)))
+ ((null? lst))
+ (assert (string-match (regexp2 (car lst)) (car lst))))
+(print the-cache)
+
+(print "cached/literal")
+(bgrep 1 (regexp2 (rx1)))
+
+(print the-cache)
+
+(print "cached/literal (SRE)")
+(bgrep 1 (regexp2 (rx2)))
+
+(print the-cache)
+
+(define-compiler-syntax (string-search x r c)
+ (let ((%quote (r 'quote))
+ (%let (r 'let))
+ (%string-search (r 'string-search))
+ (%regexp (r 'regexp))
+ (%or (r 'or))
+ (%let* (r 'let*)))
+ (let ((rx (cadr x)))
+ (if (or (string? rx)
+ (and (pair? rx) (c (car rx) %quote)))
+ (let ((cache (vector #f))
+ (%cache (r 'cache))
+ (%tmp (r 'tmp)))
+ `(,%let* ((,%cache (,%quote ,cache))
+ (,%tmp (##sys#slot ,%cache 0)))
+ (,%string-search
+ (,%or ,%tmp
+ (,%let ((,%tmp (,%regexp ,rx)))
+ (##sys#setslot ,%cache 0 ,%tmp)
+ ,%tmp))
+ ,@(cddr x))))
+ x))))
+
+(print "inline cached/literal")
+(bgrep 1 "\\((.*), (.*)\\)")
+(print "inline cached/literal (SRE)")
+(bgrep 1 '(: #\( (submatch (* any)) ", " (submatch (* any))))
+
+
Trap