~ chicken-core (chicken-5) 4a3cec73c9a723bcf5ea656c9599a3b793cb6ecf


commit 4a3cec73c9a723bcf5ea656c9599a3b793cb6ecf
Author:     felix <felix@call-with-current-continuation.org>
AuthorDate: Thu Jul 15 15:08:03 2010 +0200
Commit:     felix <felix@call-with-current-continuation.org>
CommitDate: Tue Jul 27 13:09:33 2010 +0200

    added irregex manual (thanks to sjamaan) and made various changes for completing irregex promotion

diff --git a/distribution/manifest b/distribution/manifest
index c0687166..2f3e2898 100644
--- a/distribution/manifest
+++ b/distribution/manifest
@@ -29,6 +29,7 @@ compiler-syntax.c
 scrutinizer.c
 unboxing.c
 regex.c
+irregex.c
 posixunix.c
 posixwin.c
 profiler.c
@@ -79,6 +80,7 @@ scrutinizer.scm
 unboxing.scm
 regex.scm
 irregex.scm
+irregex-core.scm
 posixunix.scm
 posixwin.scm
 posix-common.scm
@@ -290,6 +292,7 @@ manual/Unit lolevel
 manual/Unit ports
 manual/Unit posix
 manual/Unit regex
+manual/Unit irregex
 manual/Unit srfi-1
 manual/Unit srfi-13
 manual/Unit srfi-14
diff --git a/files.scm b/files.scm
index 2c1c167f..c0a47c66 100644
--- a/files.scm
+++ b/files.scm
@@ -277,10 +277,10 @@ EOF
         (##sys#check-string pn 'decompose-pathname)
         (if (fx= 0 (##sys#size pn))
 	    (values #f #f #f)
-	    (let ([ms (string-match rx1 pn)])
+	    (let ([ms (string-search rx1 pn)])
 	      (if ms
 		  (values (strip-pds (cadr ms)) (caddr ms) (car (cddddr ms)))
-		  (let ([ms (string-match rx2 pn)])
+		  (let ([ms (string-search rx2 pn)])
 		    (if ms
 		        (values (strip-pds (cadr ms)) (caddr ms) #f)
 		        (values (strip-pds pn) #f #f) ) ) ) ) ) ) ) ) )
diff --git a/irregex-core.scm b/irregex-core.scm
index 7fc539b4..ed3be22d 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -3635,6 +3635,7 @@
                   (lp end-src end-index acc))))))))
 
 (define (irregex-fold/chunked irx kons . args)
+  (if (not (procedure? kons)) (error "irregex-fold/chunked: not a procedure" kons))
   (let ((kons2 (lambda (s i m acc) (kons s i (irregex-copy-matches m) acc))))
     (apply irregex-fold/chunked/fast irx kons2 args)))
 
diff --git a/irregex.import.scm b/irregex.import.scm
index 71f406c9..53e001d0 100644
--- a/irregex.import.scm
+++ b/irregex.import.scm
@@ -31,28 +31,43 @@
    irregex-dfa 
    irregex-dfa/extract
    irregex-dfa/search
+   irregex-extract
    irregex-flags
    irregex-fold
+   irregex-fold/chunked
    irregex-lengths
    irregex-match
+   irregex-match?
    irregex-match-data?
    irregex-match-end
+   irregex-match-end-chunk
    irregex-match-end-index
+   irregex-match-names
    irregex-match-num-submatches
    irregex-match-start
+   irregex-match-start-chunk
    irregex-match-start-index
    irregex-match-string
+   irregex-match-subchunk
    irregex-match-substring
+   irregex-match/chunked
    irregex-names
    irregex-new-matches
    irregex-nfa
+   irregex-num-submatches
+   irregex-opt
+   irregex-quote
    irregex-replace
    irregex-replace/all
    irregex-reset-matches!
    irregex-search
+   irregex-search/chunked
    irregex-search/matches
+   irregex-split
    irregex-submatches
    irregex?
+   make-irregex-chunker
+   maybe-string->sre
    sre->irregex
    string->irregex 
    string->sre
diff --git a/irregex.scm b/irregex.scm
new file mode 100644
index 00000000..bd37aba9
--- /dev/null
+++ b/irregex.scm
@@ -0,0 +1,87 @@
+;;;; irregex.scm - container for irregex-core.scm
+;
+; Copyright (c) 2010, The Chicken Team
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following
+; conditions are met:
+;
+;   Redistributions of source code must retain the above copyright notice, this list of conditions and the following
+;     disclaimer.
+;   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+;     disclaimer in the documentation and/or other materials provided with the distribution.
+;   Neither the name of the author nor the names of its contributors may be used to endorse or promote
+;     products derived from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
+; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+; POSSIBILITY OF SUCH DAMAGE.
+
+
+
+(declare (unit irregex))
+
+(declare
+  (disable-interrupts)
+  (no-procedure-checks)
+  (fixnum)
+  (export
+   irregex
+   irregex-apply-match
+   irregex-dfa
+   irregex-dfa/extract
+   irregex-dfa/search
+   irregex-extract
+   irregex-flags
+   irregex-fold
+   irregex-fold/chunked
+   irregex-lengths
+   irregex-match
+   irregex-match?
+   irregex-match-data?
+   irregex-match-end
+   irregex-match-end-chunk
+   irregex-match-end-index
+   irregex-match-names
+   irregex-match-num-submatches
+   irregex-match-start
+   irregex-match-start-chunk
+   irregex-match-start-index
+   irregex-match-string
+   irregex-match-subchunk
+   irregex-match-substring
+   irregex-match/chunked
+   irregex-names
+   irregex-new-matches
+   irregex-nfa
+   irregex-num-submatches
+   irregex-opt
+   irregex-quote
+   irregex-replace
+   irregex-replace/all
+   irregex-reset-matches!
+   irregex-search
+   irregex-search/chunked
+   irregex-search/matches
+   irregex-split
+   irregex-submatches
+   irregex? 
+   make-irregex-chunker
+   maybe-string->sre
+   irregex-search/chunked
+   sre->irregex
+   string->irregex
+   string->sre
+   ))
+
+(include "common-declarations.scm")
+
+(register-feature! 'irregex)
+
+(include "irregex-core.scm")
diff --git a/library.scm b/library.scm
index 9d725885..53036b6e 100644
--- a/library.scm
+++ b/library.scm
@@ -76,7 +76,8 @@
 #define C_a_get_current_seconds(ptr, c, dummy)  C_flonum(ptr, time(NULL))
 #define C_peek_c_string_at(ptr, i)    ((C_char *)(((C_char **)ptr)[ i ]))
 
-static C_word fast_read_line_from_file(C_word str, C_word port, C_word size) {
+static C_word
+fast_read_line_from_file(C_word str, C_word port, C_word size) {
   int n = C_unfix(size);
   int i;
   int c;
@@ -101,7 +102,7 @@ static C_word fast_read_line_from_file(C_word str, C_word port, C_word size) {
 }
 
 static C_word
-fast_read_string_from_file (C_word dest, C_word port, C_word len, C_word pos)
+fast_read_string_from_file(C_word dest, C_word port, C_word len, C_word pos)
 {
   int n = C_unfix (len);
   char * buf = ((char *)C_data_pointer (dest) + C_unfix (pos));
@@ -1731,7 +1732,8 @@ EOF
 
 (define (##sys#check-port x . loc)
   (unless (%port? x)
-    (##sys#signal-hook #:type-error (and (pair? loc) (car loc)) "argument is not a port" x) ) )
+    (##sys#signal-hook
+     #:type-error (and (pair? loc) (car loc)) "argument is not a port" x) ) )
 
 (define (##sys#check-port-mode port mode . loc)
   (unless (eq? mode (##sys#slot port 1))
diff --git a/manual/Supported language b/manual/Supported language
index 3fe014cd..07794a9e 100644
--- a/manual/Supported language	
+++ b/manual/Supported language	
@@ -19,7 +19,8 @@
 * [[Unit ports]] I/O ports
 * [[Unit files]] File and pathname operations
 * [[Unit extras]] Useful utility definitions
-* [[Unit regex]] Regular expressions
+* [[Unit irregex]] Regular expressions
+* [[Unit regex]] Regular expression utilities
 * [[Unit srfi-1]] List Library
 * [[Unit srfi-4]] Homogeneous numeric vectors
 * [[Unit srfi-13]] String library
diff --git a/manual/Unit extras b/manual/Unit extras
index 72081c1f..be49fa4f 100644
--- a/manual/Unit extras	
+++ b/manual/Unit extras	
@@ -196,4 +196,4 @@ false. Returns a string with the accumulated characters.
 ---
 Previous: [[Unit files]]
 
-Next: [[Unit regex]]
+Next: [[Unit irregex]]
diff --git a/manual/Unit irregex b/manual/Unit irregex
new file mode 100644
index 00000000..f8c37036
--- /dev/null
+++ b/manual/Unit irregex	
@@ -0,0 +1,819 @@
+[[tags: manual]]
+[[toc:]]
+
+== Unit irregex
+
+This library unit provides support for regular expressions, using the
+powerful ''irregex'' regular expression engine by Alex Shinn.  It
+supports both POSIX syntax with various (irregular) PCRE extensions,
+as well as SCSH's SRE syntax, with various aliases for commonly used
+patterns.  DFA matching is used when possible, otherwise a
+closure-compiled NFA approach is used.  Matching may be performed over
+standard Scheme strings, or over arbitrarily chunked streams of
+strings.
+
+On systems that support dynamic loading, the {{irregex}} unit can
+be made available in the Chicken interpreter ({{csi}}) by entering
+
+<enscript highlight=scheme>
+(require-extension irregex)
+</enscript>
+
+[[toc:]]
+
+=== Specification
+
+==== Procedures
+
+===== irregex
+===== string->irregex
+===== sre->irregex
+
+<procedure>(irregex <posix-string-or-sre> [<options> ...])</procedure><br>
+<procedure>(string->irregex <posix-string> [<options> ...])</procedure><br>
+<procedure>(sre->irregex <sre> [<options> ...])</procedure><br>
+
+Compiles a regular expression from either a POSIX-style regular
+expression string (with most PCRE extensions) or an SCSH-style SRE.
+There is no {{(rx ...)}} syntax - just use normal Scheme lists, with
+{{quasiquote}} if you like.
+
+Technically a string by itself could be considered a valid (though
+rather silly) SRE, so if you want to just match a literal string you
+should use something like {{(irregex `(: ,str))}}, or use the explicit
+{{(sre->irregex str)}}.
+
+The options are a list of any of the following symbols:
+
+; {{'i}}, {{'case-insensitive}} : match case-insensitively
+; {{'m}}, {{'multi-line}} : treat string as multiple lines (effects {{^}} and {{$}})
+; {{'s}}, {{'single-line}} : treat string as a single line ({{.}} can match newline)
+; {{'utf8}} : utf8-mode (assumes strings are byte-strings)
+; {{'fast}} : try to optimize the regular expression
+; {{'small}} : try to compile a smaller regular expression
+; {{'backtrack}} : enforce a backtracking implementation
+
+The {{'fast}} and {{'small}} options are heuristic guidelines and will
+not necessarily make the compiled expression faster or smaller.
+
+===== string->sre
+===== maybe-string->sre
+
+<procedure>(string->sre <str>)</procedure><br>
+<procedure>(maybe-string->sre <obj>)</procedure><br>
+
+For backwards compatibility, procedures to convert a POSIX string into
+an SRE.
+
+{{maybe-string->sre}} does the same thing, but only if the argument is
+a string, otherwise it assumes {{<obj>}} is an SRE and returns it
+as-is.  This is useful when you want to provide an API that allows
+either a POSIX string or SRE (like {{irregex}} or {{irregex-search}}
+below) - it ensures the result is an SRE.
+
+===== irregex?
+
+<procedure>(irregex? <obj>)</procedure><br>
+
+Returns {{#t}} iff the object is a regular expression.
+
+===== irregex-search
+
+<procedure>(irregex-search <irx> <str> [<start> <end>])</procedure>
+
+Searches for any instances of the pattern {{<irx>}} (a POSIX string, SRE
+sexp, or pre-compiled regular expression) in {{<str>}}, optionally between
+the given range.  If a match is found, returns a match object,
+otherwise returns {{#f}}.
+
+Match objects can be used to query the original range of the string or
+its submatches using the {{irregex-match-*}} procedures below.
+
+Examples:
+
+<enscript highlight=scheme>
+(irregex-search "foobar" "abcFOOBARdef") => #f
+
+(irregex-search "foobar" "abcFOOBARdef" 'i) => #<match>
+
+(irregex-search '(w/nocase "foobar") "abcFOOBARdef") => #<match>
+</enscript>
+
+Note, the actual match result is represented by a vector in the
+default implementation.  Throughout this manual, we'll just write
+{{#<match>}} to show that a successful match was returned when the
+details are not important.
+
+Matching follows the POSIX leftmost, longest semantics, when
+searching.  That is, of all possible matches in the string,
+{{irregex-search}} will return the match at the first position
+(leftmost).  If multiple matches are possible from that same first
+position, the longest match is returned.
+
+===== irregex-match
+
+<procedure>(irregex-match <irx> <str>)</procedure>
+
+Like {{irregex-search}}, but performs an anchored match against the
+beginning and end of the string, without searching.
+
+Examples:
+
+<enscript highlight=scheme>
+(irregex-match '(w/nocase "foobar") "abcFOOBARdef") => #f
+
+(irregex-match '(w/nocase "foobar") "FOOBAR") => #<match>
+</enscript>
+
+===== irregex-match-data?
+
+<procedure>(irregex-match-data? <obj>)</procedure>
+
+Returns {{#t}} iff the object is a successful match result from
+{{irregex-search}} or {{irregex-match}}.
+
+===== irregex-num-submatches
+===== irregex-match-num-submatches
+
+<procedure>(irregex-num-submatches <irx>)</procedure><br>
+<procedure>(irregex-match-num-submatches <match>)</procedure>
+
+Returns the number of numbered submatches that are defined in the
+irregex or match object.
+
+===== irregex-names
+===== irregex-match-names
+
+<procedure>(irregex-names <irx>)</procedure><br>
+<procedure>(irregex-match-names <match>)</procedure>
+
+Returns an association list of named submatches that are defined in
+the irregex or match object.  The {{car}} of each item in this list is
+the name of a submatch, the {{cdr}} of each item is the numerical
+submatch corresponding to this name.  If a named submatch occurs
+multiple times in the irregex, it will also occur multiple times in
+this list.
+
+===== irregex-match-substring
+===== irregex-match-start-index
+===== irregex-match-end-index
+
+<procedure>(irregex-match-substring <match> [<index-or-name>])</procedure><br>
+<procedure>(irregex-match-start-index <match> <index-or-name>)</procedure><br>
+<procedure>(irregex-match-end-index <match> <index-or-name>)</procedure>
+
+Fetches the matched substring (or its start or end offset) at the
+given submatch index, or named submatch.  The entire match is index 0,
+the first 1, etc.  The default is index 0.
+
+===== irregex-match-subchunk
+
+<procedure>(irregex-match-subchunk <match> [<index-or-name>])</procedure>
+
+Generates a chunked data-type for the given match item, of the same
+type as the underlying chunk type (see Chunked String Matching below).
+This is only available if the chunk type specifies the get-subchunk
+API, otherwise an error is raised.
+
+===== irregex-replace
+===== irregex-replace/all
+
+<procedure>(irregex-replace <irx> <str> [<replacements> ...])</procedure><br>
+<procedure>(irregex-replace/all <irx> <str> [<replacements> ...])</procedure>
+
+Matches a pattern in a string, and replaces it with a (possibly empty)
+list of substitutions.  Each {{<replacement>}} can be either a string
+literal, a numeric index, a symbol (as a named submatch), or a
+procedure which takes one argument (the match object) and returns a
+string.
+
+Examples:
+
+<enscript highlight=scheme>
+(irregex-replace "[aeiou]" "hello world" "*") => "h*llo world"
+
+(irregex-replace/all "[aeiou]" "hello world" "*") => "h*ll* w*rld"
+</enscript>
+
+===== irregex-split
+===== irregex-extract
+
+<procedure>(irregex-split <irx> <str> [<start> <end>])</procedure><br>
+<procedure>(irregex-extract <irx> <str> [<start> <end>])</procedure>
+
+{{irregex-split}} splits the string {{<str>}} into substrings divided
+by the pattern in {{<irx>}}.  {{irregex-extract}} does the opposite,
+returning a list of each instance of the pattern matched disregarding
+the substrings in between.
+
+===== irregex-fold
+
+<procedure>(irregex-fold <irx> <kons> <knil> <str> [<finish> <start> <end>])</procedure>
+
+This performs a fold operation over every non-overlapping place
+{{<irx>}} occurs in the string {{str}}.
+
+The {{<kons>}} procedure takes the following signature:
+
+<enscript highlight=scheme>
+(<kons> <from-index> <match> <seed>)
+</enscript>
+
+where {{<from-index>}} is the index from where we started searching
+(initially {{<start>}} and thereafter the end index of the last
+match), {{<match>}} is the resulting match-data object, and {{<seed>}}
+is the accumulated fold result starting with {{<knil>}}.
+
+The rationale for providing the {{<from-index>}} (which is not
+provided in the SCSH {{regexp-fold}} utility), is because this
+information is useful (e.g. for extracting the unmatched portion of
+the string before the current match, as needed in
+{{irregex-replace}}), and not otherwise directly accessible.
+
+The optional {{<finish>}} takes two arguments:
+
+<enscript highlight=scheme>
+(<finish> <from-index> <seed>)
+</enscript>
+
+which simiarly allows you to pick up the unmatched tail of the string,
+and defaults to just returning the {{<seed>}}.
+
+{{<start>}} and {{<end>}} are numeric indices letting you specify the
+boundaries of the string on which you want to fold.
+
+To extract all instances of a match out of a string, you can use
+
+<enscript highlight=scheme>
+(map irregex-match-substring
+     (irregex-fold <irx>
+                   (lambda (i m s) (cons m s))
+		   '()
+		   <str>
+		   (lambda (i s) (reverse s))))
+</enscript>
+
+==== Extended SRE Syntax
+
+Irregex provides the first native implementation of SREs (Scheme
+Regular Expressions), and includes many extensions necessary both for
+minimal POSIX compatibility, as well as for modern extensions found in
+libraries such as PCRE.
+
+The following table summarizes the SRE syntax, with detailed
+explanations following.
+
+  ;; basic patterns
+  <string>                          ; literal string
+  (seq <sre> ...)                   ; sequence
+  (: <sre> ...)
+  (or <sre> ...)                    ; alternation
+  
+  ;; optional/multiple patterns
+  (? <sre> ...)                     ; 0 or 1 matches
+  (* <sre> ...)                     ; 0 or more matches
+  (+ <sre> ...)                     ; 1 or more matches
+  (= <n> <sre> ...)                 ; exactly <n> matches
+  (>= <n> <sre> ...)                ; <n> or more matches
+  (** <from> <to> <sre> ...)        ; <n> to <m> matches
+  (?? <sre> ...)                    ; non-greedy (non-greedy) pattern: (0 or 1)
+  (*? <sre> ...)                    ; non-greedy kleene star
+  (**? <from> <to> <sre> ...)       ; non-greedy range
+  
+  ;; submatch patterns
+  (submatch <sre> ...)              ; numbered submatch
+  ($ <sre> ...)
+  (submatch-named <name> <sre> ...) ; named submatch
+  (=> <name> <sre> ...)
+  (backref <n-or-name>)             ; match a previous submatch
+  
+  ;; toggling case-sensitivity
+  (w/case <sre> ...)                ; enclosed <sre>s are case-sensitive
+  (w/nocase <sre> ...)              ; enclosed <sre>s are case-insensitive
+  
+  ;; character sets
+  <char>                            ; singleton char set
+  (<string>)                        ; set of chars
+  (or <cset-sre> ...)               ; set union
+  (~ <cset-sre> ...)                ; set complement (i.e. [^...])
+  (- <cset-sre> ...)                ; set difference
+  (& <cset-sre> ...)                ; set intersection
+  (/ <range-spec> ...)              ; pairs of chars as ranges
+  
+  ;; named character sets
+  any
+  nonl
+  ascii
+  lower-case     lower
+  upper-case     upper
+  alphabetic     alpha
+  numeric        num
+  alphanumeric   alphanum  alnum
+  punctuation    punct
+  graphic        graph
+  whitespace     white     space
+  printing       print
+  control        cntrl
+  hex-digit      xdigit
+  
+  ;; assertions and conditionals
+  bos eos                           ; beginning/end of string
+  bol eol                           ; beginning/end of line
+  bow eow                           ; beginning/end of word
+  nwb                               ; non-word-boundary
+  (look-ahead <sre> ...)            ; zero-width look-ahead assertion
+  (look-behind <sre> ...)           ; zero-width look-behind assertion
+  (neg-look-ahead <sre> ...)        ; zero-width negative look-ahead assertion
+  (neg-look-behind <sre> ...)       ; zero-width negative look-behind assertion
+  (atomic <sre> ...)                ; for (?>...) independent patterns
+  (if <test> <pass> [<fail>])       ; conditional patterns
+  commit                            ; don't backtrack beyond this (i.e. cut)
+  
+  ;; backwards compatibility
+  (posix-string <string>)           ; embed a POSIX string literal
+
+===== Basic SRE Patterns
+
+The simplest SRE is a literal string, which matches that string
+exactly.
+
+<enscript highlight=scheme>
+(irregex-search "needle" "hayneedlehay") => #<match>
+</enscipt>
+
+By default the match is case-sensitive, though you can control this
+either with the compiler flags or local overrides:
+
+<enscript highlight=scheme>
+(irregex-search "needle" "haynEEdlehay") => #f
+
+(irregex-search (irregex "needle" 'i) "haynEEdlehay") => #<match>
+
+(irregex-search '(w/nocase "needle") "haynEEdlehay") => #<match>
+</enscript>
+
+You can use {{w/case}} to switch back to case-sensitivity inside a
+{{w/nocase}} or when the SRE was compiled with {{'i}}:
+
+<enscript highlight=scheme>
+(irregex-search '(w/nocase "SMALL" (w/case "BIG")) "smallBIGsmall") => #<match>
+
+(irregex-search '(w/nocase "small" (w/case "big")) "smallBIGsmall") => #f
+</enscript>
+
+Of course, literal strings by themselves aren't very interesting
+regular expressions, so we want to be able to compose them.  The most
+basic way to do this is with the {{seq}} operator (or its abbreviation
+{{:}}), which matches one or more patterns consecutively:
+
+<enscript highlight=scheme>
+(irregex-search '(: "one" space "two" space "three") "one two three") => #<match>
+</enscript>
+
+As you may have noticed above, the {{w/case}} and {{w/nocase}}
+operators allowed multiple SREs in a sequence - other operators that
+take any number of arguments (e.g. the repetition operators below)
+allow such implicit sequences.
+
+To match any one of a set of patterns use the {{or}} alternation
+operator:
+
+<enscript highlight=scheme>
+(irregex-search '(or "eeney" "meeney" "miney") "meeney") => #<match>
+
+(irregex-search '(or "eeney" "meeney" "miney") "moe") => #f
+</enscript>
+
+===== SRE Repetition Patterns
+
+There are also several ways to control the number of times a pattern
+is matched.  The simplest of these is {{?}} which just optionally
+matches the pattern:
+
+<enscript highlight=scheme>
+(irregex-search '(: "match" (? "es") "!") "matches!") => #<match>
+
+(irregex-search '(: "match" (? "es") "!") "match!") => #<match>
+
+(irregex-search '(: "match" (? "es") "!") "matche!") => #<match>
+</enscript>
+
+To optionally match any number of times, use {{*}}, the Kleene star:
+
+<enscript highlight=scheme>
+(irregex-search '(: "<" (* (~ #\>)) ">") "<html>") => #<match>
+
+(irregex-search '(: "<" (* (~ #\>)) ">") "<>") => #<match>
+
+(irregex-search '(: "<" (* (~ #\>)) ">") "<html") => #f
+</enscript>
+
+Often you want to match any number of times, but at least one time is
+required, and for that you use {{+}}:
+
+<enscript highlight=scheme>
+(irregex-search '(: "<" (+ (~ #\>)) ">") "<html>") => #<match>
+
+(irregex-search '(: "<" (+ (~ #\>)) ">") "<a>") => #<match>
+
+(irregex-search '(: "<" (+ (~ #\>)) ">") "<>") => #f
+</enscript>
+
+More generally, to match at least a given number of times, use {{>=}}:
+
+<enscript highlight=scheme>
+(irregex-search '(: "<" (>= 3 (~ #\>)) ">") "<table>") => #<match>
+
+(irregex-search '(: "<" (>= 3 (~ #\>)) ">") "<pre>") => #<match>
+
+(irregex-search '(: "<" (>= 3 (~ #\>)) ">") "<tr>") => #f
+</enscript>
+
+To match a specific number of times exactly, use {{=}}:
+
+<enscript highlight=scheme>
+(irregex-search '(: "<" (= 4 (~ #\>)) ">") "<html>") => #<match>
+
+(irregex-search '(: "<" (= 4 (~ #\>)) ">") "<table>") => #f
+</enscript>
+
+And finally, the most general form is {{**}} which specifies a range
+of times to match.  All of the earlier forms are special cases of this.
+
+<enscript highlight=scheme>
+(irregex-search '(: (= 3 (** 1 3 numeric) ".") (** 1 3 numeric)) "192.168.1.10") => #<match>
+
+(irregex-search '(: (= 3 (** 1 3 numeric) ".") (** 1 3 numeric)) "192.0168.1.10") => #f
+</enscript>
+
+There are also so-called "non-greedy" variants of these repetition
+operators, by convention suffixed with an additional {{?}}.  Since the
+normal repetition patterns can match any of the allotted repetition
+range, these operators will match a string if and only if the normal
+versions matched.  However, when the endpoints of which submatch
+matched where are taken into account (specifically, all matches when
+using irregex-search since the endpoints of the match itself matter),
+the use of a non-greedy repetition can change the result.
+
+So, whereas {{?}} can be thought to mean "match or don't match,"
+{{??}} means "don't match or match."  {{*}} typically consumes as much
+as possible, but {{*?}} tries first to match zero times, and only
+consumes one at a time if that fails.  If you have a greedy operator
+followed by a non-greedy operator in the same pattern, they can
+produce surprisins results as they compete to make the match longer or
+shorter.  If this seems confusing, that's because it is.  Non-greedy
+repetitions are defined only in terms of the specific backtracking
+algorithm used to implement them, which for compatibility purposes
+always means the Perl algorithm.  Thus, when using these patterns you
+force IrRegex to use a backtracking engine, and can't rely on
+efficient execution.
+
+===== SRE Character Sets
+
+Perhaps more common than matching specific strings is matching any of
+a set of characters.  You can use the {{or}} alternation pattern on a
+list of single-character strings to simulate a character set, but this
+is too clumsy for everyday use so SRE syntax allows a number of
+shortcuts.
+
+A single character matches that character literally, a trivial
+character class.  More conveniently, a list holding a single element
+which is a string refers to the character set composed of every
+character in the string.
+
+<enscript highlight=scheme>
+(irregex-match '(* #\-) "---") => #<match>
+
+(irregex-match '(* #\-) "-_-") => #f
+
+(irregex-match '(* ("aeiou")) "oui") => #<match>
+
+(irregex-match '(* ("aeiou")) "ouais") => #f
+</enscript>
+
+Ranges are introduced with the \q{/} operator.  Any strings or
+characters in the \q{/} are flattened and then taken in pairs to
+represent the start and end points, inclusive, of character ranges.
+
+<enscript highlight=scheme>
+(irregex-match '(* (/ "AZ09")) "R2D2") => #<match>
+
+(irregex-match '(* (/ "AZ09")) "C-3PO") => #f
+</enscript>
+
+In addition, a number of set algebra operations are provided.  \q{or},
+of course, has the same meaning, but when all the options are
+character sets it can be thought of as the set union operator.  This
+is further extended by the \q{&} set intersection, \q{-} set
+difference, and \q{~} set complement operators.
+
+<enscript highlight=scheme>
+(irregex-match '(* (& (/ "az") (~ ("aeiou")))) "xyzzy") => #<match>
+
+(irregex-match '(* (& (/ "az") (~ ("aeiou")))) "vowels") => #f
+
+(irregex-match '(* (- (/ "az") ("aeiou"))) "xyzzy") => #<match>
+
+(irregex-match '(* (- (/ "az") ("aeiou"))) "vowels") => #f
+</enscript>
+
+===== SRE Assertion Patterns
+
+There are a number of times it can be useful to assert something about
+the area around a pattern without explicitly making it part of the
+pattern.  The most common cases are specifically anchoring some
+pattern to the beginning or end of a word or line or even the whole
+string.  For example, to match on the end of a word:
+
+<enscript highlight=scheme>
+(irregex-match '(: "foo" eow) "foo") => #<match>
+
+(irregex-match '(: "foo" eow) "foo!") => #<match>
+
+(irregex-match '(: "foo" eow) "foof") => #f
+</enscript>
+
+The {{bow}}, {{bol}}, {{eol}}, {{bos}} and {{eos}} work similarly.
+{{nwb}} asserts that you are not in a word-boundary - if replaced for
+{{eow}} in the above examples it would reverse all the results.
+
+There is no {{wb}}, since you tend to know from context whether it
+would be the beginning or end of a word, but if you need it you can
+always use {{(or bow eow)}}.
+
+Somewhat more generally, Perl introduced positive and negative
+look-ahead and look-behind patterns.  Perl look-behind patterns are
+limited to a fixed length, however the IrRegex versions have no such
+limit.
+
+<enscript highlight=scheme>
+(irregex-match '(: "regular" (look-ahead " expression"))
+               "regular expression")
+ => #<match>
+</enscript>
+
+The most general case, of course, would be an \q{and} pattern to
+complement the \q{or} pattern - all the patterns must match or the
+whole pattern fails.  This may be provided in a future release,
+although it (and look-ahead and look-behind assertions) are unlikely
+to be compiled efficiently.
+
+===== SRE Utility Patterns
+
+The following utility regular expressions are also provided for common
+patterns that people are eternally reinventing.  They are not
+necessarily the official patterns matching the RFC definitions of the
+given data, because of the way that such patterns tend to be used.
+There are three general usages for regexps:
+
+; searching : search for a pattern matching a desired object in a larger text
+
+; validation : determine whether an entire string matches a pattern
+
+; extraction : given a string already known to be valid, extract certain fields from it as submatches
+
+In some cases, but not always, these will overlap.  When they are
+different, {{irregex-search}} will naturally always want the searching
+version, so IrRegex provides that version.
+
+As an example where these might be different, consider a URL.  If you
+want to match all the URLs in some arbitrary text, you probably want
+to exclude a period or comma at the tail end of a URL, since it's more
+likely being used as punctuation rather than part of the URL, despite
+the fact that it would be valid URL syntax.
+
+Another problem with the RFC definitions is the standard itself may
+have become irrelevant.  For example, the pattern IrRegex provides for
+email addresses doesn't match quoted local parts (e.g.
+{{"first last"@domain.com}}) because these are increasingly rare, and
+unsupported by enough software that it's better to discourage their use.
+Conversely, technically consecutive periods
+(e.g. {{first..last@domain.com}}) are not allowed in email addresses, but
+most email software does allow this, and in fact such addresses are
+quite common in Japan.
+
+The current patterns provided are:
+
+  newline                        ; general newline pattern (crlf, cr, lf)
+  integer                        ; an integer
+  real                           ; a real number (including scientific)
+  string                         ; a "quoted" string
+  symbol                         ; an R5RS Scheme symbol
+  ipv4-address                   ; a numeric decimal ipv4 address
+  ipv6-address                   ; a numeric hexadecimal ipv6 address
+  domain                         ; a domain name
+  email                          ; an email address
+  http-url                       ; a URL beginning with https?://
+
+Because of these issues the exact definitions of these patterns are
+subject to be changed, but will be documented clearly when they are
+finalized.  More common patterns are also planned, but as what you
+want increases in complexity it's probably better to use a real
+parser.
+
+==== Supported PCRE Syntax
+
+Since the PCRE syntax is so overwhelming complex, it's easier to just
+list what we *don't* support for now.  Refer to the
+[[http://pcre.org/pcre.txt|PCRE documentation]] for details.  You
+should be using the SRE syntax anyway!
+
+Unicode character classes ({{\P}}) are not supported, but will be
+in an upcoming release.  {{\C}} named characters are not supported.
+
+Callbacks, subroutine patterns and recursive patterns are not
+supported.  ({{*FOO}}) patterns are not supported and may never be.
+
+{{\G}} and {{\K}} are not supported.
+
+Octal character escapes are not supported because they are ambiguous
+with back-references - just use hex character escapes.
+
+Other than that everything should work, including named submatches,
+zero-width assertions, conditional patterns, etc.
+
+In addition, {{\<}} and {{\>}} act as beginning-of-word and end-of-word
+marks, respectively, as in Emacs regular expressions.
+
+Also, two escapes are provided to embed SRE patterns inside PCRE
+strings, {{"\'<sre>"}} and {{"(*'<sre>)"}}.  For example, to match a
+comma-delimited list of integers you could use
+
+<enscript highlight=scheme>
+"\\'integer(,\\'integer)*"
+</enscript>
+
+and to match a URL in angle brackets you could use
+
+<enscript highlight=scheme>
+"<('*http-url)>"
+</enscript>
+
+Note in the second example the enclosing {{"('*...)"}} syntax is needed
+because the Scheme reader would consider the closing {{">"}} as part of
+the SRE symbol.
+
+The following chart gives a quick reference from PCRE form to the SRE
+equivalent:
+
+  ;; basic syntax
+  "^"                     ;; bos (or eos inside (?m: ...))
+  "$"                     ;; eos (or eos inside (?m: ...))
+  "."                     ;; nonl
+  "a?"                    ;; (? a)
+  "a*"                    ;; (* a)
+  "a+"                    ;; (+ a)
+  "a??"                   ;; (?? a)
+  "a*?"                   ;; (*? a)
+  "a+?"                   ;; (+? a)
+  "a{n,m}"                ;; (** n m a)
+
+  ;; grouping
+  "(...)"                 ;; (submatch ...)
+  "(?:...)"               ;; (: ...)
+  "(?i:...)"              ;; (w/nocase ...)
+  "(?-i:...)"             ;; (w/case ...)
+  "(?<name>...)"          ;; (=> <name>...)
+
+  ;; character classes
+  "[aeiou]"               ;; ("aeiou")
+  "[^aeiou]"              ;; (~ "aeiou")
+  "[a-z]"                 ;; (/ "az") or (/ "a" "z")
+  "[[:alpha:]]"           ;; alpha
+
+  ;; assertions
+  "(?=...)"               ;; (look-ahead ...)
+  "(?!...)"               ;; (neg-look-ahead ...)
+  "(?<=...)"              ;; (look-behind ...)
+  "(?<!...)"              ;; (neg-look-behind ...)
+  "(?(test)pass|fail)"    ;; (if test pass fail)
+  "(*COMMIT)"             ;; commit
+
+==== Chunked String Matching
+
+It's often desirable to perform regular expression matching over
+sequences of characters not represented as a single string.  The most
+obvious example is a text-buffer data structure, but you may also want
+to match over lists or trees of strings (i.e. ropes), over only
+certain ranges within a string, over an input port, etc.  With
+existing regular expression libraries, the only way to accomplish this
+is by converting the abstract sequence into a freshly allocated
+string.  This can be expensive, or even impossible if the object is a
+text-buffer opened onto a 500MB file.
+
+IrRegex provides a chunked string API specifically for this purpose.
+You define a chunking API with {{make-irregex-chunker}}:
+
+===== make-irregex-chunker
+
+<procedure>(make-irregex-chunker <get-next> <get-string> [<get-start> <get-end> <get-substring> <get-subchunk>])</procedure>
+
+where 
+
+{{(<get-next> chunk) => }} returns the next chunk, or {{#f}} if there are no more chunks
+
+{{(<get-string> chunk) => }} a string source for the chunk
+
+{{(<get-start> chunk) => }} the start index of the result of {{<get-string>}} (defaults to always 0)
+
+{{(<get-end> chunk) => }} the end (exclusive) of the string (defaults to {{string-length}} of the source string)
+
+{{(<get-substring> cnk1 i cnk2 j) => }} a substring for the range between the chunk {{cnk1}} starting at index {{i}} and ending at {{cnk2}} at index {{j}}
+
+{{(<get-subchunk> cnk1 i cnk2 j) => }} as above but returns a new chunked data type instead of a string (optional)
+
+There are two important constraints on the {{<get-next>}} procedure.
+It must return an {{eq?}} identical object when called multiple times
+on the same chunk, and it must not return a chunk with an empty string
+(start == end).  This second constraint is for performance reasons -
+we push the work of possibly filtering empty chunks to the chunker
+since there are many chunk types for which empty strings aren't
+possible, and this work is thus not needed.  Note that the initial
+chunk passed to match on is allowed to be empty.
+
+{{<get-substring>}} is provided for possible performance improvements
+- without it a default is used.  {{<get-subchunk>}} is optional -
+without it you may not use {{irregex-match-subchunk}} described above.
+
+You can then match chunks of these types with the following
+procedures:
+
+===== irregex-search/chunked
+===== irregex-match/chunked
+
+<procedure>(irregex-search/chunked <irx> <chunker> <chunk> [<start>])</procedure><br>
+<procedure>(irregex-match/chunked <irx> <chunker> <chunk> [<start>])</procedure>
+
+These return normal match-data objects.
+
+Example:
+
+To match against a simple, flat list of strings use:
+
+<enscript highlight=scheme>
+  (define (rope->string rope1 start rope2 end)
+    (if (eq? rope1 rope2)
+        (substring (car rope1) start end)
+        (let loop ((rope (cdr rope1))
+                   (res (list (substring (car rope1) start))))
+           (if (eq? rope rope2)
+               (string-concatenate-reverse      ; from SRFI-13
+                (cons (substring (car rope) 0 end) res))
+               (loop (cdr rope) (cons (car rope) res))))))
+
+  (define rope-chunker
+    (make-irregex-chunker (lambda (x) (and (pair? (cdr x)) (cdr x)))
+                          car
+                          (lambda (x) 0)
+                          (lambda (x) (string-length (car x)))
+                          rope->string))
+
+  (irregex-search/chunked <pat> rope-chunker <list-of-strings>)
+</enscript>
+
+Here we are just using the default start, end and substring behaviors,
+so the above chunker could simply be defined as:
+
+<enscript highlight=scheme>
+  (define rope-chunker
+    (make-irregex-chunker (lambda (x) (and (pair? (cdr x)) (cdr x))) car))
+</enscript>
+
+===== irregex-fold/chunked
+
+<procedure>(irregex-fold/chunked <irx> <kons> <knil> <chunker> <chunk> [<finish> [<start-index>]])</procedure>
+
+Chunked version of {{irregex-fold}}.
+
+==== Utilities
+
+The following procedures are also available.
+
+===== irregex-quote
+
+<procedure>(irregex-quote <str>)</procedure>
+
+Returns a new string with any special regular expression characters
+escaped, to match the original string literally in POSIX regular
+expressions.
+
+===== irregex-opt
+
+<procedure>(irregex-opt <list-of-strings>)</procedure>
+
+Returns an optimized SRE matching any of the literal strings
+in the list, like Emacs' \q{regexp-opt}.  Note this optimization
+doesn't help when irregex is able to build a DFA.
+
+===== sre->string
+
+<procedure>(sre->string <sre>)</procedure>
+
+Convert an SRE to a POSIX-style regular expression string, if
+possible.
+
+
+---
+Previous: [[Unit extras]]
+
+Next: [[Unit regex]]
diff --git a/manual/Unit regex b/manual/Unit regex
index 2d0c249e..bd6eb479 100644
--- a/manual/Unit regex	
+++ b/manual/Unit regex	
@@ -3,21 +3,15 @@
 
 == Unit regex
 
-This library unit provides support for regular expressions. The regular 
-expression package used is {{irregex}}
-written by Alex Shinn. Irregex supports most Perl-extensions and is
-written completely in Scheme.
 
-This library unit exposes two APIs: the standard Chicken API described below, and the
-original irregex API.  You may use either API or both:
+This library unit provides some high-level operations for regular
+expression and operations that are kept for backward compatibility
+to older versions of CHICKEN.
 
- (require-library regex)   ; required for either API, or both
- (import regex)            ; import the Chicken regex API
- (import irregex)          ; import the original irregex API
+This unit uses the {{irregex}} unit internally. It is recommended
+to use the {{irregex}} API where possible, since it provides a
+more featureful interface. 
 
-Regular expressions may be either POSIX-style strings (with most PCRE
-extensions) or an SCSH-style SRE. There is no {{(rx ...)}} syntax -
-just use normal Scheme lists, with quasiquote if you like.
 
 === grep
 
@@ -196,266 +190,7 @@ into a regular expression.
 =>  "\\^\\[0-9\\]\\+:.\n.\\*\\$"
 </enscript>
 
-=== Extended SRE Syntax
-
-The following table summarizes the SRE syntax, with detailed explanations following.
-
-  ;; basic patterns
-  <string>                          ; literal string
-  (seq <sre> ...)                   ; sequence
-  (: <sre> ...)
-  (or <sre> ...)                    ; alternation
-  
-  ;; optional/multiple patterns
-  (? <sre> ...)                     ; 0 or 1 matches
-  (* <sre> ...)                     ; 0 or more matches
-  (+ <sre> ...)                     ; 1 or more matches
-  (= <n> <sre> ...)                 ; exactly <n> matches
-  (>= <n> <sre> ...)                ; <n> or more matches
-  (** <from> <to> <sre> ...)        ; <n> to <m> matches
-  (?? <sre> ...)                    ; non-greedy (non-greedy) pattern: (0 or 1)
-  (*? <sre> ...)                    ; non-greedy kleene star
-  (**? <from> <to> <sre> ...)       ; non-greedy range
-  
-  ;; submatch patterns
-  (submatch <sre> ...)              ; numbered submatch
-  (submatch-named <name> <sre> ...) ; named submatch
-  (=> <name> <sre> ...)
-  (backref <n-or-name>)             ; match a previous submatch
-  
-  ;; toggling case-sensitivity
-  (w/case <sre> ...)                ; enclosed <sre>s are case-sensitive
-  (w/nocase <sre> ...)              ; enclosed <sre>s are case-insensitive
-  
-  ;; character sets
-  <char>                            ; singleton char set
-  (<string>)                        ; set of chars
-  (or <cset-sre> ...)               ; set union
-  (~ <cset-sre> ...)                ; set complement (i.e. [^...])
-  (- <cset-sre> ...)                ; set difference
-  (& <cset-sre> ...)                ; set intersection
-  (/ <range-spec> ...)              ; pairs of chars as ranges
-  
-  ;; named character sets
-  any
-  nonl
-  ascii
-  lower-case     lower
-  upper-case     upper
-  alphabetic     alpha
-  numeric        num
-  alphanumeric   alphanum  alnum
-  punctuation    punct
-  graphic        graph
-  whitespace     white     space
-  printing       print
-  control        cntrl
-  hex-digit      xdigit
-  
-  ;; assertions and conditionals
-  bos eos                           ; beginning/end of string
-  bol eol                           ; beginning/end of line
-  bow eow                           ; beginning/end of word
-  nwb                               ; non-word-boundary
-  (look-ahead <sre> ...)            ; zero-width look-ahead assertion
-  (look-behind <sre> ...)           ; zero-width look-behind assertion
-  (neg-look-ahead <sre> ...)        ; zero-width negative look-ahead assertion
-  (neg-look-behind <sre> ...)       ; zero-width negative look-behind assertion
-  (atomic <sre> ...)                ; for (?>...) independent patterns
-  (if <test> <pass> [<fail>])       ; conditional patterns
-  commit                            ; don't backtrack beyond this (i.e. cut)
-  
-  ;; backwards compatibility
-  (posix-string <string>)           ; embed a POSIX string literal
-
-====  Basic SRE Patterns
-
-The simplest SRE is a literal string, which matches that string exactly.
-
-  (string-search "needle" "hayneedlehay") => <match>
-
-By default the match is case-sensitive, though you can control this either with the compiler flags or local overrides:
-
-  (string-search "needle" "haynEEdlehay") => #f
-  
-  (string-search (irregex "needle" 'i) "haynEEdlehay") => <match>
-  
-  (string-search '(w/nocase "needle") "haynEEdlehay") => <match>
-
-You can use {{w/case}} to switch back to case-sensitivity inside a {{w/nocase}}:
-
-  (string-search '(w/nocase "SMALL" (w/case "BIG")) "smallBIGsmall") => <match>
-  
-  (string-search '(w/nocase "small" (w/case "big")) "smallBIGsmall") => #f
-
-Of course, literal strings by themselves aren't very interesting
-regular expressions, so we want to be able to compose them. The most
-basic way to do this is with the {{seq}} operator (or its abbreviation {{:}}),
-which matches one or more patterns consecutively:
-
-  (string-search '(: "one" space "two" space "three") "one two three") => <match>
-
-As you may have noticed above, the {{w/case}} and {{w/nocase}} operators
-allowed multiple SREs in a sequence - other operators that take any
-number of arguments (e.g. the repetition operators below) allow such
-implicit sequences.
-
-To match any one of a set of patterns use the or alternation operator:
-
-  (string-search '(or "eeney" "meeney" "miney") "meeney") => <match>
-
-  (string-search '(or "eeney" "meeney" "miney") "moe") => #f
-
-====  SRE Repetition Patterns
-
-There are also several ways to control the number of times a pattern
-is matched. The simplest of these is {{?}} which just optionally matches
-the pattern:
-
-  (string-search '(: "match" (? "es") "!") "matches!") => <match>
-  
-  (string-search '(: "match" (? "es") "!") "match!") => <match>
-  
-  (string-search '(: "match" (? "es") "!") "matche!") => #f
-
-To optionally match any number of times, use {{*}}, the Kleene star:
-
-  (string-search '(: "<" (* (~ #\>)) ">") "<html>") => <match>
-  
-  (string-search '(: "<" (* (~ #\>)) ">") "<>") => <match>
-  
-  (string-search '(: "<" (* (~ #\>)) ">") "<html") => #f
-
-Often you want to match any number of times, but at least one time is required, and for that you use {{+}}:
-
-  (string-search '(: "<" (+ (~ #\>)) ">") "<html>") => <match>
-  
-  (string-search '(: "<" (+ (~ #\>)) ">") "<a>") => <match>
-  
-  (string-search '(: "<" (+ (~ #\>)) ">") "<>") => #f
-
-More generally, to match at least a given number of times, use {{>=}}:
-
-  (string-search '(: "<" (>= 3 (~ #\>)) ">") "<table>") => <match>
-
-  (string-search '(: "<" (>= 3 (~ #\>)) ">") "<pre>") => <match>
-
-  (string-search '(: "<" (>= 3 (~ #\>)) ">") "<tr>") => #f
-
-To match a specific number of times exactly, use {=}:
-
-  (string-search '(: "<" (= 4 (~ #\>)) ">") "<html>") => <match>
-  
-  (string-search '(: "<" (= 4 (~ #\>)) ">") "<table>") => #f
-
-And finally, the most general form is {{**}} which specifies a range
-of times to match. All of the earlier forms are special cases of this.
-
-  (string-search '(: (= 3 (** 1 3 numeric) ".") (** 1 3 numeric)) "192.168.1.10") => <match>
-
-  (string-search '(: (= 3 (** 1 3 numeric) ".") (** 1 3 numeric)) "192.0168.1.10") => #f
-
-There are also so-called "non-greedy" variants of these repetition
-operators, by convention suffixed with an additional {{?}}. Since the
-normal repetition patterns can match any of the allotted repetition
-range, these operators will match a string if and only if the normal
-versions matched. However, when the endpoints of which submatch
-matched where are taken into account (specifically, all matches when
-using string-search since the endpoints of the match itself matter),
-the use of a non-greedy repetition can change the result.
-
-So, whereas {{?}} can be thought to mean "match or don't match," {{??}} means
-"don't match or match." {{*}} typically consumes as much as possible, but
-{{*?}} tries first to match zero times, and only consumes one at a time if
-that fails. If you have a greedy operator followed by a non-greedy
-operator in the same pattern, they can produce surprisins results as
-they compete to make the match longer or shorter. If this seems
-confusing, that's because it is. Non-greedy repetitions are defined
-only in terms of the specific backtracking algorithm used to implement
-them, which for compatibility purposes always means the Perl
-algorithm. Thus, when using these patterns you force IrRegex to use a
-backtracking engine, and can't rely on efficient execution.
-
-====  SRE Character Sets
-
-Perhaps more common than matching specific strings is matching any of
-a set of characters. You can use the or alternation pattern on a list
-of single-character strings to simulate a character set, but this is
-too clumsy for everyday use so SRE syntax allows a number of
-shortcuts.
-
-A single character matches that character literally, a trivial
-character class. More conveniently, a list holding a single element
-which is a string refers to the character set composed of every
-character in the string.
-
-  (string-match '(* #\-) "---") => <match>
-  
-  (string-match '(* #\-) "-_-") => #f
-  
-  (string-match '(* ("aeiou")) "oui") => <match>
-  
-  (string-match '(* ("aeiou")) "ouais") => #f
-
-Ranges are introduced with the {{/}} operator. Any strings or characters
-in the {{/}} are flattened and then taken in pairs to represent the start
-and end points, inclusive, of character ranges.
-
-  (string-match '(* (/ "AZ09")) "R2D2") => <match>
-  
-  (string-match '(* (/ "AZ09")) "C-3PO") => #f
-
-In addition, a number of set algebra operations are provided. or, of
-course, has the same meaning, but when all the options are character
-sets it can be thought of as the set union operator. This is further
-extended by the {{&}} set intersection, {{-}} set difference, and {{~}} set
-complement operators.
-
-  (string-match '(* (& (/ "az") (~ ("aeiou")))) "xyzzy") => <match>
-  
-  (string-match '(* (& (/ "az") (~ ("aeiou")))) "vowels") => #f
-
-  (string-match '(* (- (/ "az") ("aeiou"))) "xyzzy") => <match>
-  
-  (string-match '(* (- (/ "az") ("aeiou"))) "vowels") => #f
-
-====  SRE Assertion Patterns
-
-There are a number of times it can be useful to assert something about
-the area around a pattern without explicitly making it part of the
-pattern. The most common cases are specifically anchoring some pattern
-to the beginning or end of a word or line or even the whole
-string. For example, to match on the end of a word:
-
-  (string-match '(: "foo" eow) "foo") => <match>
-  
-  (string-match '(: "foo" eow) "foo!") => <match>
-  
-  (string-match '(: "foo" eow) "foof") => #f
-
-The {{bow}}, {{bol}}, {{eol}}, {{bos}} and {{eos}} work similarly. {{nwb}} asserts that you
-are not in a word-boundary - if replaced for {{eow}} in the above examples
-it would reverse all the results.
-
-There is no {{wb}}, since you tend to know from context whether it
-would be the beginning or end of a word, but if you need it you can
-always use (or bow eow).
-
-Somewhat more generally, Perl introduced positive and negative
-look-ahead and look-behind patterns. Perl look-behind patterns are
-limited to a fixed length, however the IrRegex versions have no such
-limit.
-
-  (string-match '(: "regular" (look-ahead " expression")) "regular expression") => <match>
-
-The most general case, of course, would be an and pattern to
-complement the or pattern - all the patterns must match or the whole
-pattern fails. This may be provided in a future release, although it
-(and look-ahead and look-behind assertions) are unlikely to be
-compiled efficiently.
-
-
 ---
-Previous: [[Unit extras]]
+Previous: [[Unit irregex]]
 
 Next: [[Unit srfi-1]]
diff --git a/posix-common.scm b/posix-common.scm
index 55f9f488..724d3283 100644
--- a/posix-common.scm
+++ b/posix-common.scm
@@ -245,12 +245,12 @@ EOF
             '()
             (let ((path (car paths)))
               (let-values (((dir fil ext) (decompose-pathname path)))
-                (let* ((patt (glob->regexp (make-pathname #f (or fil "*") ext)))
-                       (rx (regexp patt)))
+                (let ((rx (glob->regexp (make-pathname #f (or fil "*") ext))))
                   (let loop ((fns (directory (or dir ".") #t)))
                     (cond ((null? fns) (conc-loop (cdr paths)))
                           ((string-match rx (car fns))
-                           => (lambda (m) (cons (make-pathname dir (car m)) (loop (cdr fns)))) )
+                           => (lambda (m)
+				(cons (make-pathname dir (car m)) (loop (cdr fns)))) )
                           (else (loop (cdr fns))) ) ) ) ) ) ) ) ) ) )
 
 
diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm
index 4a62befb..8118ad5f 100644
--- a/tests/test-irregex.scm
+++ b/tests/test-irregex.scm
@@ -43,19 +43,21 @@
                  (lp)))))))))))
 
 (define (test-re matcher line)
-  (match (string-split line "\t" #t)
-    ((pattern input result subst output)
-     (let ((name (sprintf "~A  ~A  ~A  ~A" pattern input result subst)))
-       (cond
-        ((equal? "c" result)
-         (test-error name (matcher pattern input)))
-        ((equal? "n" result)
-         (test-assert name (not (matcher pattern input))))
-        (else
-         (test name output
-           (subst-matches (matcher pattern input) subst))))))
-    (else
-     (warning "invalid regex test line" line))))
+  (let ((splt (string-split line "\t" #t)))
+    (if (list? splt)
+	(apply
+	 (lambda (pattern input result subst output)
+	   (let ((name (sprintf "~A  ~A  ~A  ~A" pattern input result subst)))
+	     (cond
+	      ((equal? "c" result)
+	       (test-error name (matcher pattern input)))
+	      ((equal? "n" result)
+	       (test-assert name (not (matcher pattern input))))
+	      (else
+	       (test-equal name output
+		     (subst-matches (matcher pattern input) subst))))))
+	 splt)
+	(warning "invalid regex test line" line))))
 
 (test-begin)
 
@@ -151,40 +153,43 @@
 
 (for-each
  (lambda (opts)
-   (test-group (sprintf "irregex/chunked - ~S" opts)
-     (with-input-from-file "re-tests.txt"
-       (lambda ()
-         (port-for-each
-          (lambda (line)
-            (match (string-split line "\t" #t)
-              ((pattern input result subst output)
-               (let ((name
-                      (sprintf "~A  ~A  ~A  ~A" pattern input result subst)))
-                 (cond
-                  ((equal? "c" result))
-                  ((equal? "n" result)
-                   (for-each
-                    (lambda (rope)
-                      (test-assert name
-                        (not (irregex-search/chunked pattern
-                                                     rope-chunker
-                                                     rope))))
-                    (append (make-ropes input)
-                            (make-shared-ropes input))))
-                  (else
-                   (for-each
-                    (lambda (rope)
-                      (test name output
-                        (subst-matches (irregex-search/chunked pattern
-                                                               rope-chunker
-                                                               rope)
-                                       subst)))
-                    (append (make-ropes input)
-                            (make-shared-ropes input)))))))
-              (else
-               (warning "invalid regex test line" line)))
-            )
-          read-line)))))
+   (test-group
+    (sprintf "irregex/chunked - ~S" opts)
+    (with-input-from-file "re-tests.txt"
+      (lambda ()
+	(port-for-each
+	 (lambda (line)
+	   (let ((splt (string-split line "\t" #t)))
+	     (if (list? splt)
+		 (apply 
+		  (lambda (pattern input result subst output)
+		    (let ((name
+			   (sprintf "~A  ~A  ~A  ~A" pattern input result subst)))
+		      (cond
+		       ((equal? "c" result))
+		       ((equal? "n" result)
+			(for-each
+			 (lambda (rope)
+			   (test-assert name
+					(not (irregex-search/chunked pattern
+								     rope-chunker
+								     rope))))
+			 (append (make-ropes input)
+				 (make-shared-ropes input))))
+		       (else
+			(for-each
+			 (lambda (rope)
+			   (test-equal
+			    name output
+			    (subst-matches (irregex-search/chunked pattern
+								   rope-chunker
+								   rope)
+					   subst)))
+			 (append (make-ropes input)
+				 (make-shared-ropes input)))))))
+		  splt)
+		 (warning "invalid regex test line" line))))
+	 read-line)))))
  '((backtrack)
    (fast)
    ))
@@ -252,32 +257,32 @@
   (test-assert (irregex-match-data? (irregex-match "a.*b" "axxxb")))
   (test-assert (not (irregex-match-data? (vector '*irregex-match-tag* #f #f #f #f #f #f #f #f #f))))
   (test-assert (not (irregex-match-data? (vector #f #f #f #f #f #f #f #f #f #f #f))))
-  (test 0 (irregex-num-submatches (irregex "a.*b")))
-  (test 1 (irregex-num-submatches (irregex "a(.*)b")))
-  (test 2 (irregex-num-submatches (irregex "(a(.*))b")))
-  (test 2 (irregex-num-submatches (irregex "a(.*)(b)")))
-  (test 10 (irregex-num-submatches (irregex "((((((((((a))))))))))")))
-  (test 0 (irregex-match-num-submatches (irregex-search "a.*b" "axxxb")))
-  (test 1 (irregex-match-num-submatches (irregex-search "a(.*)b" "axxxb")))
-  (test 2 (irregex-match-num-submatches (irregex-search "(a(.*))b" "axxxb")))
-  (test 2 (irregex-match-num-submatches (irregex-search "a(.*)(b)" "axxxb")))
-  (test 10 (irregex-match-num-submatches (irregex-search "((((((((((a))))))))))" "a")))
+  (test-equal 0 (irregex-num-submatches (irregex "a.*b")))
+  (test-equal 1 (irregex-num-submatches (irregex "a(.*)b")))
+  (test-equal 2 (irregex-num-submatches (irregex "(a(.*))b")))
+  (test-equal 2 (irregex-num-submatches (irregex "a(.*)(b)")))
+  (test-equal 10 (irregex-num-submatches (irregex "((((((((((a))))))))))")))
+  (test-equal 0 (irregex-match-num-submatches (irregex-search "a.*b" "axxxb")))
+  (test-equal 1 (irregex-match-num-submatches (irregex-search "a(.*)b" "axxxb")))
+  (test-equal 2 (irregex-match-num-submatches (irregex-search "(a(.*))b" "axxxb")))
+  (test-equal 2 (irregex-match-num-submatches (irregex-search "a(.*)(b)" "axxxb")))
+  (test-equal 10 (irregex-match-num-submatches (irregex-search "((((((((((a))))))))))" "a")))
   )
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (test-group "utils"
-  (test "h*llo world"
+  (test-equal "h*llo world"
       (irregex-replace "[aeiou]" "hello world" "*"))
-  (test "h*ll* w*rld"
+  (test-equal "h*ll* w*rld"
       (irregex-replace/all "[aeiou]" "hello world" "*"))
-  (test '("bob@test.com" "fred@example.com")
+  (test-equal '("bob@test.com" "fred@example.com")
       (irregex-fold 'email
                     (lambda (i m s) (cons (irregex-match-substring m) s))
                     '()
                     "bob@test.com and fred@example.com"
                     (lambda (i s) (reverse s))))
-  (test '("bob@test.com" "fred@example.com")
+  (test-equal '("bob@test.com" "fred@example.com")
       (irregex-fold/chunked
        'email
        (lambda (src i m s) (cons (irregex-match-substring m) s))
diff --git a/tests/test.scm b/tests/test.scm
index e9b43c14..c16de6a5 100644
--- a/tests/test.scm
+++ b/tests/test.scm
@@ -77,7 +77,8 @@
 (define-syntax test-equal
   (syntax-rules ()
     ((_ name expr value eq) (run-equal name (lambda () expr) value eq))
-    ((_ name expr value) (run-equal name (lambda () expr) value equal?))))
+    ((_ name expr value) (run-equal name (lambda () expr) value equal?))
+    ((_ expr value) (run-equal (->string value) (lambda () expr) value equal?))))
 
 (define-syntax test-error
   (syntax-rules ()
@@ -89,7 +90,8 @@
 
 (define-syntax test-assert
   (syntax-rules ()
-    ((_ name expr) (run-equal name (lambda () (if expr #t #f)) #t eq?))))
+    ((_ name expr) (run-equal name (lambda () (if expr #t #f)) #t eq?))
+    ((_ expr) (run-equal (->string expr) (lambda () (if expr #t #f)) #t eq?))))
 
 (define-syntax test-group
   (syntax-rules ()
Trap