~ chicken-core (chicken-5) 412c4e7a43a2374da42f0738d5970a195c1b59e9


commit 412c4e7a43a2374da42f0738d5970a195c1b59e9
Author:     felix <felix@y.(none)>
AuthorDate: Sat Mar 6 23:05:53 2010 +0100
Commit:     felix <felix@y.(none)>
CommitDate: Sat Mar 6 23:05:53 2010 +0100

    glob->regexp returns regexp, optional sre; does a better job now

diff --git a/manual/Unit regex b/manual/Unit regex
index e68cfc33..2d0c249e 100644
--- a/manual/Unit regex	
+++ b/manual/Unit regex	
@@ -39,7 +39,7 @@ matching. {{ACCESSOR}} defaults to the identity function.
 
 === glob->regexp
 
-<procedure>(glob->regexp PATTERN)</procedure>
+<procedure>(glob->regexp PATTERN [SRE?])</procedure>
 
 Converts the file-pattern {{PATTERN}} into a regular expression.
 
@@ -56,6 +56,10 @@ Converts the file-pattern {{PATTERN}} into a regular expression.
  [-C...]
  ?
 
+{{glob->regexp}} returns a regular expression object if the optional
+argument {{SRE?}} is false or not given, otherwise the SRE of the
+computed regular expression is returned.
+
 
 === regexp
 
diff --git a/regex.scm b/regex.scm
index 277b452e..7a3ebeb6 100644
--- a/regex.scm
+++ b/regex.scm
@@ -247,37 +247,44 @@
 ;;; Glob support:
 
 (define glob->regexp
-  (let ([list->string list->string]
-        [string->list string->list] )
-    (lambda (s)
+  (let ((list->string list->string)
+        (string->list string->list)
+	(regexp regexp))
+    (lambda (s #!optional sre?)
       (##sys#check-string s 'glob->regexp)
-      (list->string
-       (let loop ((cs (string->list s)))
-         (if (null? cs)
-             '()
-             (let ([c (car cs)]
-                   [rest (cdr cs)] )
-               (cond [(char=? c #\*)  `(#\. #\* ,@(loop rest))]
-                     [(char=? c #\?)  (cons '#\. (loop rest))]
-                     [(char=? c #\[)
-                      (cons
-                       #\[
-                       (let loop2 ((rest rest))
-                         (if (pair? rest)
-			     (cond ((char=? #\] (car rest))
-				    (cons #\] (loop (cdr rest))))
-				   ((and (char=? #\- (car rest)) (pair? (cdr rest)))
-				    `(#\- ,(cadr rest) ,@(loop2 (cddr rest))))
-				   ((and (pair? (cdr rest)) (pair? (cddr rest))
-					 (char=? #\- (cadr rest)) )
-				    `(,(car rest) #\- ,(caddr rest)
-				      ,@(loop2 (cdddr rest))))
-				   ((pair? rest)
-				    (cons (car rest) (loop2 (cdr rest))))
-				   ((null? rest)
-				    (error 'glob->regexp "unexpected end of character class" s))))))]
-                     [(or (char-alphabetic? c) (char-numeric? c)) (cons c (loop rest))]
-                     [else `(#\\ ,c ,@(loop rest))] ) ) ) ) ) ) ) )
+      (let ((sre
+	     (cons 
+	      ':
+	      (let loop ((cs (string->list s)) (dir #t))
+		(if (null? cs)
+		    '()
+		    (let ((c (car cs))
+			  (rest (cdr cs)) )
+		      (cond ((char=? c #\*) 
+			     (if dir
+				 `((or (: (~ ("./\\")) (* (~ ("/\\"))))
+				       (* (~ ("./\\"))))
+				   ,@(loop rest #f))
+				 `((* (~ ("/\\"))) ,@(loop rest #f))))
+			    ((char=? c #\?)  (cons 'any (loop rest #f)))
+			    ((char=? c #\[)
+			     (let loop2 ((rest rest) (s '()))
+			       (cond ((not (pair? rest))
+				      (error 'glob->regexp "unexpected end of character class" s))
+				     ((char=? #\] (car rest))
+				      `((or ,@s) ,@(loop (cdr rest) #f)))
+				     ((and (pair? (cdr rest))
+					   (pair? (cddr rest))
+					   (char=? #\- (cadr rest)) )
+				      (loop2 (cdddr rest) (cons `(/ ,(car rest) ,(caddr rest)) s)))
+				     ((and (pair? (cdr rest))
+					   (char=? #\- (car rest)))
+				      (loop2 (cddr rest)
+					     (cons `(~ ,(cadr rest)) s)))
+				     (else
+				      (loop2 (cdr rest) (cons (car rest) s))))))
+			    (else (cons c (loop rest (memq c '(#\\ #\/))))))))))))
+	(if sre? sre (regexp sre))))))
 
 
 ;;; Grep-like function on list:
diff --git a/tests/runtests.sh b/tests/runtests.sh
index 91791e35..f14acc3d 100644
--- a/tests/runtests.sh
+++ b/tests/runtests.sh
@@ -176,6 +176,7 @@ $compile posix-tests.scm
 
 echo "======================================== regular expression tests ..."
 $interpret -bnq test-irregex.scm
+$interpret -bnq test-glob.scm
 
 echo "======================================== r4rstest ..."
 echo "(expect mult-float-print-test to fail)"
diff --git a/tests/test-glob.scm b/tests/test-glob.scm
new file mode 100644
index 00000000..c7dad78d
--- /dev/null
+++ b/tests/test-glob.scm
@@ -0,0 +1,17 @@
+;;;; test-glob.scm - test glob-pattern -> regex translation
+
+
+(use regex)
+
+(assert (string-match (glob->regexp "foo.bar") "foo.bar"))
+(assert (string-match (glob->regexp "foo*") "foo.bar"))
+(assert (string-match (glob->regexp "foo/*") "foo/bar"))
+(assert (not (string-match (glob->regexp "foo/*") "foo/.bar")))
+(assert (not (string-match (glob->regexp "*foo") ".foo")))
+(assert (not (string-match (glob->regexp "*foo*") "a.fooxxx/yyy")))
+(assert (string-match (glob->regexp "*foo*") "fooxxx"))
+(assert (string-match (glob->regexp "main.[ch]") "main.c"))
+(assert (string-match (glob->regexp "main.[ch]") "main.h"))
+(assert (not (string-match (glob->regexp "main.[ch]") "main.cpp")))
+(assert (string-match (glob->regexp "main.[-c]") "main.h"))
+(assert (not (string-match (glob->regexp "main.[-h]") "main.h")))
diff --git a/types.db b/types.db
index 22db47cb..1ebc4004 100644
--- a/types.db
+++ b/types.db
@@ -889,7 +889,7 @@
 
 ;; regex
 
-(glob->regexp (procedure glob->regexp (string) string))
+(glob->regexp (procedure glob->regexp (string #!optional *) *))
 (glob? deprecated)
 (grep (procedure grep (* list #!optional (procedure (*) *)) list))
 (regexp (procedure regexp (* #!optional * * *) (struct regexp)))
Trap