~ chicken-core (chicken-5) 412c4e7a43a2374da42f0738d5970a195c1b59e9
commit 412c4e7a43a2374da42f0738d5970a195c1b59e9
Author: felix <felix@y.(none)>
AuthorDate: Sat Mar 6 23:05:53 2010 +0100
Commit: felix <felix@y.(none)>
CommitDate: Sat Mar 6 23:05:53 2010 +0100
glob->regexp returns regexp, optional sre; does a better job now
diff --git a/manual/Unit regex b/manual/Unit regex
index e68cfc33..2d0c249e 100644
--- a/manual/Unit regex
+++ b/manual/Unit regex
@@ -39,7 +39,7 @@ matching. {{ACCESSOR}} defaults to the identity function.
=== glob->regexp
-<procedure>(glob->regexp PATTERN)</procedure>
+<procedure>(glob->regexp PATTERN [SRE?])</procedure>
Converts the file-pattern {{PATTERN}} into a regular expression.
@@ -56,6 +56,10 @@ Converts the file-pattern {{PATTERN}} into a regular expression.
[-C...]
?
+{{glob->regexp}} returns a regular expression object if the optional
+argument {{SRE?}} is false or not given, otherwise the SRE of the
+computed regular expression is returned.
+
=== regexp
diff --git a/regex.scm b/regex.scm
index 277b452e..7a3ebeb6 100644
--- a/regex.scm
+++ b/regex.scm
@@ -247,37 +247,44 @@
;;; Glob support:
(define glob->regexp
- (let ([list->string list->string]
- [string->list string->list] )
- (lambda (s)
+ (let ((list->string list->string)
+ (string->list string->list)
+ (regexp regexp))
+ (lambda (s #!optional sre?)
(##sys#check-string s 'glob->regexp)
- (list->string
- (let loop ((cs (string->list s)))
- (if (null? cs)
- '()
- (let ([c (car cs)]
- [rest (cdr cs)] )
- (cond [(char=? c #\*) `(#\. #\* ,@(loop rest))]
- [(char=? c #\?) (cons '#\. (loop rest))]
- [(char=? c #\[)
- (cons
- #\[
- (let loop2 ((rest rest))
- (if (pair? rest)
- (cond ((char=? #\] (car rest))
- (cons #\] (loop (cdr rest))))
- ((and (char=? #\- (car rest)) (pair? (cdr rest)))
- `(#\- ,(cadr rest) ,@(loop2 (cddr rest))))
- ((and (pair? (cdr rest)) (pair? (cddr rest))
- (char=? #\- (cadr rest)) )
- `(,(car rest) #\- ,(caddr rest)
- ,@(loop2 (cdddr rest))))
- ((pair? rest)
- (cons (car rest) (loop2 (cdr rest))))
- ((null? rest)
- (error 'glob->regexp "unexpected end of character class" s))))))]
- [(or (char-alphabetic? c) (char-numeric? c)) (cons c (loop rest))]
- [else `(#\\ ,c ,@(loop rest))] ) ) ) ) ) ) ) )
+ (let ((sre
+ (cons
+ ':
+ (let loop ((cs (string->list s)) (dir #t))
+ (if (null? cs)
+ '()
+ (let ((c (car cs))
+ (rest (cdr cs)) )
+ (cond ((char=? c #\*)
+ (if dir
+ `((or (: (~ ("./\\")) (* (~ ("/\\"))))
+ (* (~ ("./\\"))))
+ ,@(loop rest #f))
+ `((* (~ ("/\\"))) ,@(loop rest #f))))
+ ((char=? c #\?) (cons 'any (loop rest #f)))
+ ((char=? c #\[)
+ (let loop2 ((rest rest) (s '()))
+ (cond ((not (pair? rest))
+ (error 'glob->regexp "unexpected end of character class" s))
+ ((char=? #\] (car rest))
+ `((or ,@s) ,@(loop (cdr rest) #f)))
+ ((and (pair? (cdr rest))
+ (pair? (cddr rest))
+ (char=? #\- (cadr rest)) )
+ (loop2 (cdddr rest) (cons `(/ ,(car rest) ,(caddr rest)) s)))
+ ((and (pair? (cdr rest))
+ (char=? #\- (car rest)))
+ (loop2 (cddr rest)
+ (cons `(~ ,(cadr rest)) s)))
+ (else
+ (loop2 (cdr rest) (cons (car rest) s))))))
+ (else (cons c (loop rest (memq c '(#\\ #\/))))))))))))
+ (if sre? sre (regexp sre))))))
;;; Grep-like function on list:
diff --git a/tests/runtests.sh b/tests/runtests.sh
index 91791e35..f14acc3d 100644
--- a/tests/runtests.sh
+++ b/tests/runtests.sh
@@ -176,6 +176,7 @@ $compile posix-tests.scm
echo "======================================== regular expression tests ..."
$interpret -bnq test-irregex.scm
+$interpret -bnq test-glob.scm
echo "======================================== r4rstest ..."
echo "(expect mult-float-print-test to fail)"
diff --git a/tests/test-glob.scm b/tests/test-glob.scm
new file mode 100644
index 00000000..c7dad78d
--- /dev/null
+++ b/tests/test-glob.scm
@@ -0,0 +1,17 @@
+;;;; test-glob.scm - test glob-pattern -> regex translation
+
+
+(use regex)
+
+(assert (string-match (glob->regexp "foo.bar") "foo.bar"))
+(assert (string-match (glob->regexp "foo*") "foo.bar"))
+(assert (string-match (glob->regexp "foo/*") "foo/bar"))
+(assert (not (string-match (glob->regexp "foo/*") "foo/.bar")))
+(assert (not (string-match (glob->regexp "*foo") ".foo")))
+(assert (not (string-match (glob->regexp "*foo*") "a.fooxxx/yyy")))
+(assert (string-match (glob->regexp "*foo*") "fooxxx"))
+(assert (string-match (glob->regexp "main.[ch]") "main.c"))
+(assert (string-match (glob->regexp "main.[ch]") "main.h"))
+(assert (not (string-match (glob->regexp "main.[ch]") "main.cpp")))
+(assert (string-match (glob->regexp "main.[-c]") "main.h"))
+(assert (not (string-match (glob->regexp "main.[-h]") "main.h")))
diff --git a/types.db b/types.db
index 22db47cb..1ebc4004 100644
--- a/types.db
+++ b/types.db
@@ -889,7 +889,7 @@
;; regex
-(glob->regexp (procedure glob->regexp (string) string))
+(glob->regexp (procedure glob->regexp (string #!optional *) *))
(glob? deprecated)
(grep (procedure grep (* list #!optional (procedure (*) *)) list))
(regexp (procedure regexp (* #!optional * * *) (struct regexp)))
Trap