~ chicken-core (chicken-5) 8a516a042542ad4245d79eecc23643e765d61a0e


commit 8a516a042542ad4245d79eecc23643e765d61a0e
Author:     felix <felix@call-with-current-continuation.org>
AuthorDate: Thu Aug 19 03:41:21 2010 -0400
Commit:     felix <felix@call-with-current-continuation.org>
CommitDate: Thu Aug 19 03:41:21 2010 -0400

    manually applied patch between experimental and total-irregex

diff --git a/chicken-install.scm b/chicken-install.scm
index dcedaa7c..35974439 100644
--- a/chicken-install.scm
+++ b/chicken-install.scm
@@ -25,13 +25,13 @@
 
 
 (require-library setup-download setup-api)
-(require-library srfi-1 posix data-structures utils regex ports extras srfi-13 files)
+(require-library srfi-1 posix data-structures utils irregex ports extras srfi-13 files)
 (require-library chicken-syntax)	; in case an import library reexports chicken syntax
 (require-library chicken-ffi-syntax)	; same reason, also for filling modules.db
 
 (module main ()
 
-  (import scheme chicken srfi-1 posix data-structures utils regex ports extras
+  (import scheme chicken srfi-1 posix data-structures utils irregex ports extras
           srfi-13 files)
   (import setup-download setup-api)
 
@@ -51,7 +51,6 @@
       "srfi-13.import.so"
       "srfi-69.import.so"
       "extras.import.so"
-      "regex.import.so"
       "srfi-14.import.so"
       "tcp.import.so"
       "foreign.import.so"
@@ -515,17 +514,17 @@
     (let* ((files (glob (make-pathname (repository-path) "*.import.*")))
            (tmpdir (create-temporary-directory))
            (dbfile (make-pathname tmpdir +module-db+))
-           (rx (regexp ".*/([^/]+)\\.import\\.(scm|so)")))
+           (rx (irregex ".*/([^/]+)\\.import\\.(scm|so)")))
       (print "loading import libraries ...")
       (fluid-let ((##sys#warnings-enabled #f))
         (for-each
          (lambda (f)
-           (let ((m (string-match rx f)))
+           (let ((m (irregex-match rx f)))
 	     (handle-exceptions ex
 		 (print-error-message 
 		  ex (current-error-port) 
 		  (sprintf "Failed to import from `~a'" f))
-	       (eval `(import ,(string->symbol (cadr m)))))))
+	       (eval `(import ,(string->symbol (irregex-match-substring m 1)))))))
          files))
       (print "generating database")
       (let ((db
@@ -613,10 +612,10 @@ EOF
 
   (define (setup-proxy uri)
     (if (string? uri)
-        (cond ((string-match "(.+)\\:([0-9]+)" uri) =>
+        (cond ((irregex-match "(.+)\\:([0-9]+)" uri) =>
                (lambda (m)
-                 (set! *proxy-host* (cadr m))
-                 (set! *proxy-port* (string->number (caddr m))))
+                 (set! *proxy-host* (irregex-match-substring m 1))
+                 (set! *proxy-port* (string->number (irregex-match-substring m 2))))
                (else
                 (set! *proxy-host* uri)
                 (set! *proxy-port* 80))))))
@@ -625,7 +624,7 @@ EOF
 
   (define (main args)
     (let ((update #f)
-          (rx (regexp "([^:]+):(.+)")))
+          (rx (irregex "([^:]+):(.+)")))
       (setup-proxy (get-environment-variable "http_proxy"))
       (let loop ((args args) (eggs '()))
         (cond ((null? args)
@@ -776,9 +775,14 @@ EOF
                               "")
                              *eggs+dirs+vers*))
                           (loop (cdr args) (cons egg eggs))))
-                       ((string-match rx arg) =>
+                       ((irregex-match rx arg) =>
                         (lambda (m)
-                          (loop (cdr args) (alist-cons (cadr m) (caddr m) eggs))))
+                          (loop 
+			   (cdr args) 
+			   (alist-cons
+			    (irregex-match-substring m 1)
+			    (irregex-match-substring m 2)
+			    eggs))))
                        (else (loop (cdr args) (cons arg eggs))))))))))
 
   (register-feature! 'chicken-install)
diff --git a/chicken-status.scm b/chicken-status.scm
index 5222ebb0..cbd246c3 100644
--- a/chicken-status.scm
+++ b/chicken-status.scm
@@ -24,13 +24,13 @@
 ; POSSIBILITY OF SUCH DAMAGE.
 
 
-(require-library setup-api srfi-1 posix data-structures utils ports regex files)
+(require-library setup-api srfi-1 posix data-structures utils ports irregex files)
 
 
 (module main ()
   
   (import scheme chicken foreign)
-  (import srfi-1 posix data-structures utils ports regex
+  (import srfi-1 posix data-structures utils ports irregex
 	  files setup-api)
 
   (define-foreign-variable C_TARGET_LIB_HOME c-string)
@@ -45,6 +45,9 @@
 	(make-pathname C_TARGET_LIB_HOME (sprintf "chicken/~a" C_BINARY_VERSION))
 	(repository-path)))
 
+  (define (grep rx lst)
+    (filter (cut irregex-search rx <>) lst))
+
   (define (gather-eggs patterns)
     (let ((eggs (map pathname-file 
 		     (glob (make-pathname (repo-path) "*" "setup-info")))))
@@ -122,11 +125,10 @@ EOF
 		   (lambda ()
 		     (let* ((patterns
 			     (map
-			      regexp
+			      irregex
 			      (cond ((null? pats) '(".*"))
-				    ;;XXX change for total-irregex branch:
 				    (exact (map (lambda (p)
-						  (string-append "^" (regexp-escape p) "$"))
+						  (string-append "^" (irregex-quote p) "$"))
 						pats))
 				    (else pats))))
 			    (eggs (gather-eggs patterns)))
diff --git a/chicken-uninstall.scm b/chicken-uninstall.scm
index 8a074b4d..bd6b35ee 100644
--- a/chicken-uninstall.scm
+++ b/chicken-uninstall.scm
@@ -26,14 +26,14 @@
 
 (require-library
  setup-api
- srfi-1 posix data-structures utils ports regex srfi-13 files)
+ srfi-1 posix data-structures utils ports irregex srfi-13 files)
 
 
 (module main ()
   
   (import scheme chicken foreign)
   (import setup-api)
-  (import srfi-1 posix data-structures utils ports regex srfi-13 files)
+  (import srfi-1 posix data-structures utils ports irregex srfi-13 files)
 
   (define-foreign-variable C_TARGET_LIB_HOME c-string)
   (define-foreign-variable C_BINARY_VERSION int)
@@ -49,6 +49,9 @@
 
   (define *force* #f)
 
+  (define (grep rx lst)
+    (filter (cut irregex-search rx <>) lst))
+
   (define (gather-eggs patterns)
     (let ((eggs (map pathname-file 
 		     (glob (make-pathname (repo-path) "*" "setup-info")))))
@@ -117,8 +120,8 @@ EOF
 		 (map
 		  (lambda (p)
 		    (if exact
-			(regexp (string-append "^" (regexp-escape p) "$"))
-			(glob->regexp p)))
+			(irregex (string-append "^" (irregex-quote p) "$"))
+			(##sys#glob->regexp p)))
 		  pats))))
 	      (else
 	       (let ((arg (car args)))
diff --git a/defaults.make b/defaults.make
index e4e8537b..4564b952 100644
--- a/defaults.make
+++ b/defaults.make
@@ -293,7 +293,7 @@ CSI ?= csi$(EXE)
 
 # Scheme compiler flags
 
-CHICKEN_OPTIONS = -optimize-level 2 -include-path . -include-path $(SRCDIR) -inline -ignore-repository
+CHICKEN_OPTIONS = -optimize-level 2 -include-path . -include-path $(SRCDIR) -inline -ignore-repository -feature building-chicken
 ifdef DEBUGBUILD
 CHICKEN_OPTIONS += -feature debugbuild -scrutinize -types $(SRCDIR)types.db
 else
@@ -321,12 +321,9 @@ CHICKEN_INSTALL_PROGRAM = $(PROGRAM_PREFIX)chicken-install$(PROGRAM_SUFFIX)
 CHICKEN_UNINSTALL_PROGRAM = $(PROGRAM_PREFIX)chicken-uninstall$(PROGRAM_SUFFIX)
 CHICKEN_STATUS_PROGRAM = $(PROGRAM_PREFIX)chicken-status$(PROGRAM_SUFFIX)
 CHICKEN_BUG_PROGRAM = $(PROGRAM_PREFIX)chicken-bug$(PROGRAM_SUFFIX)
-IMPORT_LIBRARIES = chicken lolevel srfi-1 srfi-4 data-structures ports files posix srfi-13 srfi-69 extras \
-	regex srfi-14 tcp foreign scheme srfi-18 utils csi irregex
+IMPORT_LIBRARIES = chicken lolevel srfi-1 srfi-4 data-structures ports files posix srfi-13 srfi-69 extras srfi-14 tcp foreign scheme srfi-18 utils csi irregex
 IMPORT_LIBRARIES += setup-api setup-download
-SCRUTINIZED_LIBRARIES = library eval data-structures ports files extras lolevel utils tcp srfi-1 srfi-4 srfi-13 \
-       srfi-14 srfi-18 srfi-69 $(POSIXFILE) regex scheduler \
-       profiler stub expand chicken-syntax chicken-ffi-syntax
+SCRUTINIZED_LIBRARIES = library eval data-structures ports files extras lolevel utils tcp srfi-1 srfi-4 srfi-13 srfi-14 srfi-18 srfi-69 $(POSIXFILE) irregex scheduler profiler stub expand chicken-syntax chicken-ffi-syntax
 
 ifdef STATICBUILD
 CHICKEN_STATIC_EXECUTABLE = $(CHICKEN_PROGRAM)$(EXE)
diff --git a/distribution/manifest b/distribution/manifest
index 6c2ceb82..1c7a9a8e 100644
--- a/distribution/manifest
+++ b/distribution/manifest
@@ -28,7 +28,7 @@ optimizer.c
 compiler-syntax.c
 scrutinizer.c
 unboxing.c
-regex.c
+irregex.c
 posixunix.c
 posixwin.c
 profiler.c
@@ -77,8 +77,9 @@ optimizer.scm
 compiler-syntax.scm
 scrutinizer.scm
 unboxing.scm
-regex.scm
 irregex.scm
+irregex-core.scm
+irregex-utils.scm
 posixunix.scm
 posixwin.scm
 posix-common.scm
@@ -210,7 +211,6 @@ posix.import.scm
 srfi-13.import.scm
 srfi-69.import.scm
 extras.import.scm
-regex.import.scm
 irregex.import.scm
 srfi-14.import.scm
 tcp.import.scm
@@ -224,7 +224,6 @@ posix.import.c
 srfi-13.import.c
 srfi-69.import.c
 extras.import.c
-regex.import.c
 irregex.import.c
 srfi-14.import.c
 tcp.import.c
@@ -290,7 +289,7 @@ manual/Unit library
 manual/Unit lolevel
 manual/Unit ports
 manual/Unit posix
-manual/Unit regex
+manual/Unit irregex
 manual/Unit srfi-1
 manual/Unit srfi-13
 manual/Unit srfi-14
diff --git a/eval.scm b/eval.scm
index 0d8bb4fb..c3d98c6c 100644
--- a/eval.scm
+++ b/eval.scm
@@ -55,7 +55,7 @@
 (define-foreign-variable binary-version int "C_BINARY_VERSION")
 
 (define ##sys#core-library-modules
-  '(extras lolevel utils files tcp regex posix srfi-1 srfi-4 srfi-13 
+  '(extras lolevel utils files tcp irregex posix srfi-1 srfi-4 srfi-13 
 	   srfi-14 srfi-18 srfi-69 data-structures ports chicken-syntax
 	   chicken-ffi-syntax))
 
diff --git a/files.scm b/files.scm
index 3e8b0d63..630808a4 100644
--- a/files.scm
+++ b/files.scm
@@ -36,7 +36,8 @@
 
 (declare
   (unit files)
-  (uses regex data-structures)
+  (uses irregex data-structures)
+  (fixnum)
   (hide chop-pds absolute-pathname-root root-origin root-directory split-directory)
   (disable-interrupts) 
   (foreign-declare #<<EOF
@@ -171,20 +172,19 @@ EOF
 (define root-origin)
 (define root-directory)
 
-(let ((string-match string-match))
-  (if ##sys#windows-platform
-      (let ((rx (regexp "([A-Za-z]:)?([\\/\\\\]).*")))
-        (set! absolute-pathname-root (lambda (pn) (string-match rx pn)))
-        (set! root-origin (lambda (rt) (and rt (cadr rt))))
-        (set! root-directory (lambda (rt) (and rt (caddr rt)))) )
-      (let ((rx (regexp "([\\/\\\\]).*")))
-        (set! absolute-pathname-root (lambda (pn) (string-match rx pn)))
-        (set! root-origin (lambda (rt) #f))
-        (set! root-directory (lambda (rt) (and rt (cadr rt)))) ) ) )
+(if ##sys#windows-platform
+    (let ((rx (irregex "([A-Za-z]:)?([\\/\\\\]).*")))
+      (set! absolute-pathname-root (lambda (pn) (irregex-match rx pn)))
+      (set! root-origin (lambda (rt) (and rt (irregex-match-substring rt 1))))
+      (set! root-directory (lambda (rt) (and rt (irregex-match-substring rt 2)))) )
+    (let ((rx (irregex "([\\/\\\\]).*")))
+      (set! absolute-pathname-root (lambda (pn) (irregex-match rx pn)))
+      (set! root-origin (lambda (rt) #f))
+      (set! root-directory (lambda (rt) (and rt (irregex-match-substring rt 1)))) ) )
 
 (define (absolute-pathname? pn)
   (##sys#check-string pn 'absolute-pathname?)
-  (pair? (absolute-pathname-root pn)) )
+  (irregex-match-data? (absolute-pathname-root pn)) )
 
 (define-inline (*char-pds? ch) (memq ch '(#\\ #\/)))
 
@@ -261,28 +261,33 @@ EOF
        file ext def-pds) ) ) )
 
 (define decompose-pathname
-  (let ((string-match string-match))
-    (let* ([patt1 "^(.*[\\/\\\\])?([^\\/\\\\]+)(\\.([^\\/\\\\.]+))$"]
-	   [patt2 "^(.*[\\/\\\\])?((\\.)?[^\\/\\\\]+)$"]
-	   [rx1 (regexp patt1)]
-	   [rx2 (regexp patt2)]
-	   [strip-pds
-	     (lambda (dir)
-	       (and dir
-		    (if (member dir '("/" "\\"))
-		        dir
-		        (chop-pds dir #f) ) ) )] )
-      (lambda (pn)
-        (##sys#check-string pn 'decompose-pathname)
-        (if (fx= 0 (##sys#size pn))
-	    (values #f #f #f)
-	    (let ([ms (string-match rx1 pn)])
-	      (if ms
-		  (values (strip-pds (cadr ms)) (caddr ms) (car (cddddr ms)))
-		  (let ([ms (string-match rx2 pn)])
-		    (if ms
-		        (values (strip-pds (cadr ms)) (caddr ms) #f)
-		        (values (strip-pds pn) #f #f) ) ) ) ) ) ) ) ) )
+  (let* ([patt1 "^(.*[\\/\\\\])?([^\\/\\\\]+)(\\.([^\\/\\\\.]+))$"]
+	 [patt2 "^(.*[\\/\\\\])?((\\.)?[^\\/\\\\]+)$"]
+	 [rx1 (irregex patt1)]
+	 [rx2 (irregex patt2)]
+	 [strip-pds
+	  (lambda (dir)
+	    (and dir
+		 (if (member dir '("/" "\\"))
+		     dir
+		     (chop-pds dir #f) ) ) )] )
+    (lambda (pn)
+      (##sys#check-string pn 'decompose-pathname)
+      (if (fx= 0 (##sys#size pn))
+	  (values #f #f #f)
+	  (let ([ms (irregex-search rx1 pn)])
+	    (if ms
+		(values 
+		 (strip-pds (irregex-match-substring ms 1))
+		 (irregex-match-substring ms 2)
+		 (irregex-match-substring ms 4))
+		(let ([ms (irregex-search rx2 pn)])
+		  (if ms
+		      (values
+		       (strip-pds (irregex-match-substring ms 1))
+		       (irregex-match-substring ms 2) 
+		       #f)
+		      (values (strip-pds pn) #f #f) ) ) ) ) ) ) ) )
 
 (define pathname-directory)
 (define pathname-file)
@@ -337,7 +342,11 @@ EOF
 (define create-temporary-file)
 (define create-temporary-directory)
 
-(let ((call-with-output-file call-with-output-file)
+(let ((get-environment-variable get-environment-variable)
+      (make-pathname make-pathname)
+      (file-exists? file-exists?)
+      (directory-exists? directory-exists?)
+      (call-with-output-file call-with-output-file) 
       (temp #f)
       (temp-prefix "temp"))
   (define (tempdir)
diff --git a/irregex-core.scm b/irregex-core.scm
new file mode 100644
index 00000000..0baeb1a3
--- /dev/null
+++ b/irregex-core.scm
@@ -0,0 +1,3896 @@
+;;;; irregex.scm -- IrRegular Expressions
+;;
+;; Copyright (c) 2005-2010 Alex Shinn.  All rights reserved.
+;; BSD-style license: http://synthcode.com/license.txt
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; At this moment there was a loud ring at the bell, and I could
+;; hear Mrs. Hudson, our landlady, raising her voice in a wail of
+;; expostulation and dismay.
+;;
+;; "By heaven, Holmes," I said, half rising, "I believe that
+;; they are really after us."
+;;
+;; "No, it's not quite so bad as that.  It is the unofficial
+;; force, -- the Baker Street irregulars."
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Notes
+;;
+;; This code should not require any porting - it should work out of
+;; the box in any R[45]RS Scheme implementation.  Slight modifications
+;; are needed for R6RS (a separate R6RS-compatible version is included
+;; in the distribution as irregex-r6rs.scm).
+;;
+;; The goal of portability makes this code a little clumsy and
+;; inefficient.  Future versions will include both cleanup and
+;; performance tuning, but you can only go so far while staying
+;; portable.  AND-LET*, SRFI-9 records and custom macros would've been
+;; nice.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; History
+;;
+;; 0.8.2: 2010/08/03 - (...)? submatch extraction fix and alternate
+;;                     named submatches from Peter Bex
+;;                     Added irregex-match-valid-index? to export list
+;;                     and made it accept named submatches.  The procedures
+;;                     irregex-match-{start,end}-{index,chunk} now also
+;;                     accept named submatches, with the index argument
+;;                     made optional.  Improved argument type checks.
+;;                     Disallow negative submatch index.
+;; 0.8.1: 2010/03/09 - backtracking irregex-match fix and other small fixes
+;; 0.8.0: 2010/01/20 - optimizing DFA compilation, adding SRE escapes
+;;                     inside PCREs, adding utility SREs
+;; 0.7.5: 2009/08/31 - adding irregex-extract and irregex-split
+;;                     *-fold copies match data (use *-fold/fast for speed)
+;;                     irregex-opt now returns an SRE
+;; 0.7.4: 2009/05/14 - empty alternates (or) and empty csets always fail,
+;;                     bugfix in default finalizer for irregex-fold/chunked
+;; 0.7.3: 2009/04/14 - adding irregex-fold/chunked, minor doc fixes
+;; 0.7.2: 2009/02/11 - some bugfixes, much improved documentation
+;; 0.7.1: 2008/10/30 - several bugfixes (thanks to Derick Eddington)
+;; 0.7.0: 2008/10/20 - support abstract chunked strings
+;; 0.6.2: 2008/07/26 - minor bugfixes, allow global disabling of utf8 mode,
+;;                     friendlier error messages in parsing, \Q..\E support
+;; 0.6.1: 2008/07/21 - added utf8 mode, more utils, bugfixes
+;;   0.6: 2008/05/01 - most of PCRE supported
+;;   0.5: 2008/04/24 - fully portable R4RS, many PCRE features implemented
+;;   0.4: 2008/04/17 - rewriting NFA to use efficient closure compilation,
+;;                     normal strings only, but all of the spencer tests pass
+;;   0.3: 2008/03/10 - adding DFA converter (normal strings only)
+;;   0.2: 2005/09/27 - adding irregex-opt (like elisp's regexp-opt) utility
+;;   0.1: 2005/08/18 - simple NFA interpreter over abstract chunked strings
+
+
+(define (%irregex-error arg1 . args)
+  (apply 
+   error 
+   (if (symbol? loc1)
+       (cons (string-append (symbol->string arg1) ": " (car args))
+	     (cdr args))
+       args)))
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Data Structures
+
+(cond-expand
+  (building-chicken
+   (begin
+     (define-syntax (internal x r c)
+       `(,(with-input-from-string (cadr x) read) ,@(cddr x)))
+     ;; make-irregex defined elsewhere
+     (define (irregex? x)
+       (internal "##sys#structure?" x 'regexp))
+     (define (irregex-dfa x)
+       (internal "##sys#check-structure" x 'regexp 'irregex-dfa)
+       (internal "##sys#slot" x 1))
+     (define (irregex-dfa/search x)
+       (internal "##sys#check-structure" x 'regexp 'irregex-dfa/search)
+       (internal "##sys#slot" x 2))
+     (define (irregex-dfa/extract x)
+       (internal "##sys#check-structure" x 'regexp 'irregex-dfa/extract)
+       (internal "##sys#slot" x 3))
+     (define (irregex-nfa x)
+       (internal "##sys#check-structure" x 'regexp 'irregex-nfa)
+       (internal "##sys#slot" x 4))
+     (define (irregex-flags x)
+       (internal "##sys#check-structure" x 'regexp 'irregex-flags)
+       (internal "##sys#slot" x 5))
+     (define (irregex-num-submatches x)
+       (internal "##sys#check-structure" x 'regexp 'irregex-num-submatches)
+       (internal "##sys#slot" x 6))
+     (define (irregex-lengths x)
+       (internal "##sys#check-structure" x 'regexp 'irregex-lengths)
+       (internal "##sys#slot" x 7))
+     (define (irregex-names x)
+       (internal "##sys#check-structure" x 'regexp 'irregex-names)
+       (internal "##sys#slot" x 8))
+     ;; make-irregex-match defined elsewhere
+     (define (irregex-new-matches irx)
+       (make-irregex-match (irregex-num-submatches irx) (irregex-names irx)))
+     (define (irregex-reset-matches! m)
+       (let ((v (internal "##sys#slot" m 1)))
+	 (vector-fill! v #f)
+	 m))
+     (define (irregex-copy-matches m)
+       (and (internal "##sys#structure?" m 'regexp-match)
+	    (internal
+	     "##sys#make-structure"
+	     'regexp-match
+	     (let* ((v (internal "##sys#slot" m 1))
+		    (v2 (make-vector (internal "##sys#size" v))))
+	       (vector-copy! v v2)
+	       v2)
+	     (internal "##sys#slot" m 2)
+	     (internal "##sys#slot" m 3)
+	     (internal "##sys#slot" m 4))))
+     (define (irregex-match-data? obj)
+       (internal "##sys#structure?" obj 'regexp-match))
+     (define (irregex-match-num-submatches m)
+       (internal "##sys#check-structure" m 'regexp-match 'irregex-match-num-submatches)
+       (- (fx/ (internal "##sys#size" (internal "##sys#slot" m 1)) 4) 2))
+     (define (irregex-match-chunker m)
+       (internal "##sys#slot" m 3))
+     (define (irregex-match-names m)
+       (internal "##sys#check-structure" m 'regexp-match 'irregex-match-names)
+       (internal "##sys#slot" m 2))
+     (define (irregex-match-chunker-set! m str)
+       (internal "##sys#setslot" m 3 str))
+     (define-inline (%irregex-match-start-chunk m n)
+       (internal "##sys#slot" (internal "##sys#slot" m 1) (* n 4)))
+     (define-inline (%irregex-match-start-index m n)
+       (internal "##sys#slot" (internal "##sys#slot" m 1) (+ 1 (* n 4))))
+     (define-inline (%irregex-match-end-chunk m n)
+       (internal "##sys#slot" (internal "##sys#slot" m 1) (+ 2 (* n 4))))
+     (define (%irregex-match-end-index m n)
+       (internal "##sys#slot" (internal "##sys#slot" m 1) (+ 3 (* n 4))))
+     (define (%irregex-match-fail m) (internal "##sys#slot" m 4))
+     (define (%irregex-match-fail-set! m x) (internal "##sys#setslot" m 4 x))
+     (define-record-printer (regexp-match m out)
+       (let ((n (irregex-match-num-submatches m)))
+	 (display "#<regexp-match (" out)
+	 (display n out)
+	 (display " submatches)>" out)))
+     (define-inline (irregex-match-valid-numeric-index? m n)
+       (let ((v (internal "##sys#slot" m 1)))
+	 (and (>= n 0) (< (* n 4) (internal "##sys#size" v))
+	      (internal "##sys#slot" v (+ 1 (* n 4))))))))
+  (else
+   (begin
+     (define irregex-tag '*irregex-tag*)
+     (define (make-irregex dfa dfa/search dfa/extract nfa flags
+			   submatches lengths names)
+       (vector irregex-tag dfa dfa/search dfa/extract nfa flags
+	       submatches lengths names))
+     (define (irregex? obj)
+       (and (vector? obj)
+	    (= 9 (vector-length obj))
+	    (eq? irregex-tag (vector-ref obj 0))))
+     (define (irregex-dfa x) (vector-ref x 1))
+     (define (irregex-dfa/search x) (vector-ref x 2))
+     (define (irregex-dfa/extract x) (vector-ref x 3))
+     (define (irregex-nfa x) (vector-ref x 4))
+     (define (irregex-flags x) (vector-ref x 5))
+     (define (irregex-num-submatches x) (vector-ref x 6))
+     (define (irregex-lengths x) (vector-ref x 7))
+     (define (irregex-names x) (vector-ref x 8))
+     (define (irregex-new-matches irx)
+       (make-irregex-match (irregex-num-submatches irx) (irregex-names irx)))
+     (define (irregex-reset-matches! m)
+       (do ((i (- (vector-length m) 1) (- i 1)))
+	   ((<= i 3) m)
+	 (vector-set! m i #f)))
+     (define (irregex-copy-matches m)
+       (and (vector? m)
+	    (let ((r (make-vector (vector-length m))))
+	      (do ((i (- (vector-length m) 1) (- i 1)))
+		  ((< i 0) r)
+		(vector-set! r i (vector-ref m i))))))
+     (define irregex-match-tag '*irregex-match-tag*)
+     (define (irregex-match-data? obj)
+       (and (vector? obj)
+	    (>= (vector-length obj) 11)
+	    (eq? irregex-match-tag (vector-ref obj 0))))
+     (define (make-irregex-match count names)
+       (let ((res (make-vector (+ (* 4 (+ 2 count)) 4) #f)))
+	 (vector-set! res 0 irregex-match-tag)
+	 (vector-set! res 2 names)
+	 res))
+     (define (irregex-match-num-submatches m)
+       (- (quotient (- (vector-length m) 3) 4) 2))
+     (define (irregex-match-chunker m)
+       (vector-ref m 1))
+     (define (irregex-match-names m)
+       (vector-ref m 2))
+     (define (irregex-match-chunker-set! m str)
+       (vector-set! m 1 str))
+     (define (%irregex-match-start-chunk m n) (vector-ref m (+ 3 (* n 4))))
+     (define (%irregex-match-start-index m n) (vector-ref m (+ 4 (* n 4))))
+     (define (%irregex-match-end-chunk m n)   (vector-ref m (+ 5 (* n 4))))
+     (define (%irregex-match-end-index m n)   (vector-ref m (+ 6 (* n 4))))
+     (define (%irregex-match-fail m) (vector-ref m (- (vector-length m) 1)))
+     (define (%irregex-match-fail-set! m x) (vector-set! m (- (vector-length m) 1) x))
+     (define (irregex-match-valid-numeric-index? m n)
+       (and (>= n 0) (< (+ 3 (* n 4)) (vector-length m))
+	    (vector-ref m (+ 4 (* n 4))))))))
+
+(define (irregex-match-valid-named-index? m n)
+  (and (assq n (irregex-match-names m))
+       #t))
+
+;; public interface with error checking
+(define (irregex-match-start-chunk m . opt)
+  (let ((n (irregex-match-numeric-index 'irregex-match-start-chunk m opt #t)))
+    (%irregex-match-start-chunk m n)))
+(define (irregex-match-start-index m . opt)
+  (let ((n (irregex-match-numeric-index 'irregex-match-start-index m opt #t)))
+    (%irregex-match-start-index m n)))
+(define (irregex-match-end-chunk m . opt)
+  (let ((n (irregex-match-numeric-index 'irregex-match-end-chunk m opt #t)))
+    (%irregex-match-end-chunk m n)))
+(define (irregex-match-end-index m . opt)
+  (let ((n (irregex-match-numeric-index 'irregex-match-end-index m opt #t)))
+    (%irregex-match-end-index m n)))
+
+(define (irregex-match-start-chunk-set! m n start)
+  (vector-set! m (+ 3 (* n 4)) start))
+(define (irregex-match-start-index-set! m n start)
+  (vector-set! m (+ 4 (* n 4)) start))
+(define (irregex-match-end-chunk-set! m n end)
+  (vector-set! m (+ 5 (* n 4)) end))
+(define (irregex-match-end-index-set! m n end)
+  (vector-set! m (+ 6 (* n 4)) end))
+
+;; Helper procedure to convert any type of index from a rest args list
+;; to a numeric index.  Named submatches are converted to their corresponding
+;; numeric index, and numeric submatches are checked for validity.
+;; If strict? is true, an error is raised for invalid numeric indices.
+;; #f is returned if strict? is false, but unknown named submatches always
+;; cause an error, regardless of strict?ness
+(define (irregex-match-numeric-index location m opt strict?)
+  (cond
+   ((not (irregex-match-data? m))
+    (%irregex-error location "not match data" m))
+   ((not (pair? opt)) 0)
+   ((pair? (cdr opt))
+    (apply %irregex-error location "too many arguments" m opt))
+   (else
+    (let ((n (car opt)))
+      (if (number? n)
+          (or (and (irregex-match-valid-numeric-index? m n) n)
+              (and strict?
+                   (%irregex-error location "not a valid index" m n)))
+          (let lp ((ls (irregex-match-names m))
+                   (unknown? #t))
+            (cond
+             ((null? ls)
+              (and unknown?
+                   (%irregex-error location "unknown match name" n)))
+             ((eq? n (caar ls))
+              (if (%irregex-match-start-chunk m (cdar ls))
+                  (cdar ls)
+                  (lp (cdr ls) #f)))
+             (else (lp (cdr ls) unknown?)))))))))
+
+(define (irregex-match-valid-index? m n)
+  (if (not (irregex-match-data? m))
+      (%irregex-error 'irregex-match-valid-index? "not match data" m))
+  (if (integer? n)
+      (irregex-match-valid-numeric-index? m n)
+      (irregex-match-valid-named-index? m n)))
+
+(define (irregex-match-substring m . opt)
+  (let* ((n (irregex-match-numeric-index 'irregex-match-substring m opt #f))
+         (cnk (irregex-match-chunker m)))
+    (and n
+         ((chunker-get-substring cnk)
+          (%irregex-match-start-chunk m n)
+          (%irregex-match-start-index m n)
+          (%irregex-match-end-chunk m n)
+          (%irregex-match-end-index m n)))))
+
+(define (irregex-match-subchunk m . opt)
+  (let* ((n (irregex-match-numeric-index 'irregex-match-subchunk m opt #f))
+         (cnk (irregex-match-chunker m))
+         (get-subchunk (chunker-get-subchunk cnk)))
+    (if (not get-subchunk)
+        (%irregex-error "this chunk type does not support match subchunks" m n)
+        (and n (get-subchunk
+                (%irregex-match-start-chunk m n)
+                (%irregex-match-start-index m n)
+                (%irregex-match-end-chunk m n)
+                (%irregex-match-end-index m n))))))
+
+;; chunkers tell us how to navigate through chained chunks of strings
+
+(define (make-irregex-chunker get-next get-str . o)
+  (let* ((get-start (or (and (pair? o) (car o)) (lambda (cnk) 0)))
+         (o (if (pair? o) (cdr o) o))
+         (get-end (or (and (pair? o) (car o))
+                      (lambda (cnk) (string-length (get-str cnk)))))
+         (o (if (pair? o) (cdr o) o))
+         (get-substr
+          (or (and (pair? o) (car o))
+              (lambda (cnk1 start cnk2 end)
+                (if (eq? cnk1 cnk2)
+                    (substring (get-str cnk1) start end)
+                    (let loop ((cnk (get-next cnk1))
+                               (res (list (substring (get-str cnk1)
+                                                     start
+                                                     (get-end cnk1)))))
+                      (if (eq? cnk cnk2)
+                          (string-cat-reverse
+                           (cons (substring (get-str cnk)
+                                            (get-start cnk)
+                                            end)
+                                 res))
+                          (loop (get-next cnk)
+                                (cons (substring (get-str cnk)
+                                                 (get-start cnk)
+                                                 (get-end cnk))
+                                      res))))))))
+         (o (if (pair? o) (cdr o) o))
+         (get-subchunk (and (pair? o) (car o))))
+    (if (not (and (procedure? get-next) (procedure? get-str)
+                  (procedure? get-start) (procedure? get-substr)))
+        (%irregex-error 'make-irregex-chunker "expected a procdure"))
+    (vector get-next get-str get-start get-end get-substr get-subchunk)))
+
+(define (chunker-get-next cnk) (vector-ref cnk 0))
+(define (chunker-get-str cnk) (vector-ref cnk 1))
+(define (chunker-get-start cnk) (vector-ref cnk 2))
+(define (chunker-get-end cnk) (vector-ref cnk 3))
+(define (chunker-get-substring cnk) (vector-ref cnk 4))
+(define (chunker-get-subchunk cnk) (vector-ref cnk 5))
+
+(define (chunker-prev-chunk cnk start end)
+  (if (eq? start end)
+      #f
+      (let ((get-next (chunker-get-next cnk)))
+        (let lp ((start start))
+          (let ((next (get-next start)))
+            (if (eq? next end)
+                start
+                (and next (lp next))))))))
+
+(define (chunker-prev-char cnk start end)
+  (let ((prev (chunker-prev-chunk cnk start end)))
+    (and prev
+         (string-ref ((chunker-get-str cnk) prev)
+                     (- ((chunker-get-end cnk) prev) 1)))))
+
+(define (chunker-next-char cnk src)
+  (let ((next ((chunker-get-next cnk) src)))
+    (and next
+         (string-ref ((chunker-get-str cnk) next)
+                     ((chunker-get-start cnk) next)))))
+
+(define (chunk-before? cnk a b)
+  (and (not (eq? a b))
+       (let ((next ((chunker-get-next cnk) a)))
+         (and next
+              (if (eq? next b)
+                  #t
+                  (chunk-before? cnk next b))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; String Utilities
+
+;; Unicode version (skip surrogates)
+(define *all-chars*
+  `(/ ,(integer->char 0) ,(integer->char #xD7FF)
+      ,(integer->char #xE000) ,(integer->char #x10FFFF)))
+
+;; ASCII version, offset to not assume 0-255
+;; (define *all-chars* `(/ ,(integer->char (- (char->integer #\space) 32)) ,(integer->char (+ (char->integer #\space) 223))))
+
+;; set to #f to ignore even an explicit request for utf8 handling
+(define *allow-utf8-mode?* #t)
+
+;; (define *named-char-properties* '())
+
+(define (string-scan-char str c . o)
+  (let ((end (string-length str)))
+    (let scan ((i (if (pair? o) (car o) 0)))
+      (cond ((= i end) #f)
+            ((eqv? c (string-ref str i)) i)
+            (else (scan (+ i 1)))))))
+
+(define (string-scan-char-escape str c . o)
+  (let ((end (string-length str)))
+    (let scan ((i (if (pair? o) (car o) 0)))
+      (cond ((= i end) #f)
+            ((eqv? c (string-ref str i)) i)
+            ((eqv? c #\\) (scan (+ i 2)))
+            (else (scan (+ i 1)))))))
+
+(define (string-scan-pred str pred . o)
+  (let ((end (string-length str)))
+    (let scan ((i (if (pair? o) (car o) 0)))
+      (cond ((= i end) #f)
+            ((pred (string-ref str i)) i)
+            (else (scan (+ i 1)))))))
+
+(define (string-split-char str c)
+  (let ((end (string-length str)))
+    (let lp ((i 0) (from 0) (res '()))
+      (define (collect) (cons (substring str from i) res))
+      (cond ((>= i end) (reverse (collect)))
+            ((eqv? c (string-ref str i)) (lp (+ i 1) (+ i 1) (collect)))
+            (else (lp (+ i 1) from res))))))
+
+(define (char-alphanumeric? c)
+  (or (char-alphabetic? c) (char-numeric? c)))
+
+(define (%substring=? a b start1 start2 len)
+  (let lp ((i 0))
+    (cond ((>= i len)
+	  #t)
+	 ((char=? (string-ref a (+ start1 i)) (string-ref b (+ start2 i)))
+          (lp (+ i 1)))
+         (else
+          #f))))
+
+;; SRFI-13 extracts
+
+(define (%%string-copy! to tstart from fstart fend)
+  (do ((i fstart (+ i 1))
+       (j tstart (+ j 1)))
+      ((>= i fend))
+    (string-set! to j (string-ref from i))))
+
+(define (string-cat-reverse string-list)
+  (string-cat-reverse/aux
+   (fold (lambda (s a) (+ (string-length s) a)) 0 string-list)
+   string-list))
+
+(define (string-cat-reverse/aux len string-list)
+  (let ((res (make-string len)))
+    (let lp ((i len) (ls string-list))
+      (if (pair? ls)
+	  (let* ((s (car ls))
+		 (slen (string-length s))
+		 (i (- i slen)))
+	    (%%string-copy! res i s 0 slen)
+	    (lp i (cdr ls)))))
+    res))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; List Utilities
+
+;; like the one-arg IOTA case
+(define (zero-to n)
+  (if (<= n 0)
+      '()
+      (let lp ((i (- n 1)) (res '()))
+        (if (zero? i) (cons 0 res) (lp (- i 1) (cons i res))))))
+
+;; take the head of list FROM up to but not including TO, which must
+;; be a tail of the list
+(define (take-up-to from to)
+  (let lp ((ls from) (res '()))
+    (if (and (pair? ls) (not (eq? ls to)))
+        (lp (cdr ls) (cons (car ls) res))
+        (reverse res))))
+
+;; SRFI-1 extracts (simplified 1-ary versions)
+
+(define (find pred ls)
+  (let lp ((ls ls))
+    (cond ((null? ls) #f)
+	  ((pred (car ls)) (car ls))
+	  (else (lp (cdr ls))))))
+
+(define (find-tail pred ls)
+  (let lp ((ls ls))
+    (cond ((null? ls) #f)
+	  ((pred (car ls)) ls)
+	  (else (lp (cdr ls))))))
+
+(define (last ls)
+  (if (not (pair? ls))
+      (%irregex-error "can't take last of empty list")
+      (let lp ((ls ls))
+        (if (pair? (cdr ls))
+            (lp (cdr ls))
+            (car ls)))))
+
+(define (any pred ls)
+  (and (pair? ls)
+       (let lp ((head (car ls)) (tail (cdr ls)))
+         (if (null? tail)
+             (pred head)
+             (or (pred head) (lp (car tail) (cdr tail)))))))
+
+(define (every pred ls)
+  (or (null? ls)
+      (let lp ((head (car ls))  (tail (cdr ls)))
+        (if (null? tail)
+            (pred head)
+            (and (pred head) (lp (car tail) (cdr tail)))))))
+
+(define (fold kons knil ls)
+  (let lp ((ls ls) (res knil))
+    (if (null? ls)
+        res
+        (lp (cdr ls) (kons (car ls) res)))))
+
+(define (filter pred ls)
+  (let lp ((ls ls) (res '()))
+    (if (null? ls)
+        (reverse res)
+        (lp (cdr ls) (if (pred (car ls)) (cons (car ls) res) res)))))
+
+(define (remove pred ls)
+  (let lp ((ls ls) (res '()))
+    (if (null? ls)
+        (reverse res)
+        (lp (cdr ls) (if (pred (car ls)) res (cons (car ls) res))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Flags
+
+(define (bit-shr n i)
+  (quotient n (expt 2 i)))
+
+(define (bit-shl n i)
+  (* n (expt 2 i)))
+  
+(define (bit-not n) (- #xFFFF n))
+
+(define (bit-ior a b)
+  (cond
+   ((zero? a) b)
+   ((zero? b) a)
+   (else
+    (+ (if (or (odd? a) (odd? b)) 1 0)
+       (* 2 (bit-ior (quotient a 2) (quotient b 2)))))))
+
+(define (bit-and a b)
+  (cond
+   ((zero? a) 0)
+   ((zero? b) 0)
+   (else
+    (+ (if (and (odd? a) (odd? b)) 1 0)
+       (* 2 (bit-and (quotient a 2) (quotient b 2)))))))
+
+(define (integer-log n)
+  (define (b8 n r)
+    (if (>= n (bit-shl 1 8)) (b4 (bit-shr n 8) (+ r 8)) (b4 n r)))
+  (define (b4 n r)
+    (if (>= n (bit-shl 1 4)) (b2 (bit-shr n 4) (+ r 4)) (b2 n r)))
+  (define (b2 n r)
+    (if (>= n (bit-shl 1 2)) (b1 (bit-shr n 2) (+ r 2)) (b1 n r)))
+  (define (b1 n r) (if (>= n (bit-shl 1 1)) (+ r 1) r))
+  (if (>= n (bit-shl 1 16)) (b8 (bit-shr n 16) 16) (b8 n 0)))
+
+(define (flag-set? flags i)
+  (= i (bit-and flags i)))
+(define (flag-join a b)
+  (if b (bit-ior a b) a))
+(define (flag-clear a b)
+  (bit-and a (bit-not b)))
+
+(define ~none 0)
+(define ~searcher? 1)
+(define ~consumer? 2)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Parsing Embedded SREs in PCRE Strings
+
+;; (define (with-read-from-string str i proc)
+;;   (define (port-size in)
+;;     (let lp ((i 0)) (if (eof-object? (read-char in)) i (lp (+ i 1)))))
+;;   (let* ((len (string-length str))
+;;          (tail-len (- len i))
+;;          (in (open-input-string (substring str i len)))
+;;          (sre (read in))
+;;          (unused-len (port-size in)))
+;;     (close-input-port in)
+;;     (proc sre (- tail-len unused-len))))
+
+(define close-token (list 'close))
+(define dot-token (string->symbol "."))
+
+(define (with-read-from-string str i proc)
+  (define end (string-length str))
+  (define (read i k)
+    (cond
+     ((>= i end) (%irregex-error "unterminated embedded SRE" str))
+     (else
+      (case (string-ref str i)
+        ((#\()
+         (let lp ((i (+ i 1)) (ls '()))
+           (read
+            i
+            (lambda (x j)
+              (cond
+               ((eq? x close-token)
+                (k (reverse ls) j))
+               ((eq? x dot-token)
+                (if (null? ls)
+                    (%irregex-error "bad dotted form" str)
+                    (read j (lambda (y j2)
+                              (read j2 (lambda (z j3)
+                                         (if (not (eq? z close-token))
+                                             (%irregex-error "bad dotted form" str)
+                                             (k (append (reverse (cdr ls))
+                                                        (cons (car ls) y))
+                                                j3))))))))
+               (else
+                (lp j (cons x ls))))))))
+        ((#\))
+         (k close-token (+ i 1)))
+        ((#\;)
+         (let skip ((i (+ i 1)))
+           (if (or (>= i end) (eqv? #\newline (string-ref str i)))
+               (read (+ i 1) k)
+               (skip (+ i 1)))))
+        ((#\' #\`)
+         (read (+ i 1)
+           (lambda (sexp j)
+             (let ((q (if (eqv? #\' (string-ref str i)) 'quote 'quasiquote)))
+               (k (list q sexp) j)))))
+        ((#\,)
+         (let* ((at? (and (< (+ i 1) end) (eqv? #\@ (string-ref str (+ i 1)))))
+                (u (if at? 'uquote-splicing 'unquote))
+                (j (if at? (+ i 2) (+ i 1))))
+           (read j (lambda (sexp j) (k (list u sexp) j)))))
+        ((#\")
+         (let scan ((from (+ i 1)) (i (+ i 1)) (res '()))
+           (define (collect)
+             (if (= from i) res (cons (substring str from i) res)))
+           (if (>= i end)
+               (%irregex-error "unterminated string in embedded SRE" str)
+               (case (string-ref str i)
+                 ((#\") (k (string-cat-reverse (collect)) (+ i 1)))
+                 ((#\\) (scan (+ i 1) (+ i 2) (collect)))
+                 (else (scan from (+ i 1) res))))))
+        ((#\#)
+         (case (string-ref str (+ i 1))
+           ((#\;)
+            (read (+ i 2) (lambda (sexp j) (read j k))))
+           ((#\\)
+            (read (+ i 2)
+              (lambda (sexp j)
+                (k (case sexp
+                     ((space) #\space)
+                     ((newline) #\newline)
+                     (else (let ((s (if (number? sexp)
+                                        (number->string sexp)
+                                        (symbol->string sexp))))
+                             (string-ref s 0))))
+                   j))))
+           ((#\t #\f)
+            (k (eqv? #\t (string-ref str (+ i 1))) (+ i 2)))
+           (else
+            (%irregex-error "bad # syntax in simplified SRE" i))))
+        (else
+         (cond
+          ((char-whitespace? (string-ref str i))
+           (read (+ i 1) k))
+          (else ;; symbol/number
+           (let scan ((j (+ i 1)))
+             (cond
+              ((or (>= j end)
+                   (let ((c (string-ref str j)))
+                     (or (char-whitespace? c)
+                         (memv c '(#\; #\( #\) #\" #\# #\\)))))
+               (let ((str2 (substring str i j)))
+                 (k (or (string->number str2) (string->symbol str2)) j)))
+              (else (scan (+ j 1))))))))))))
+  (read i (lambda (res j)
+            (if (eq? res 'close-token)
+                (%irregex-error "unexpected ')' in SRE" str j)
+                (proc res j)))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Parsing PCRE Strings
+
+(define ~save? 1)
+(define ~case-insensitive? 2)
+(define ~multi-line? 4)
+(define ~single-line? 8)
+(define ~ignore-space? 16)
+(define ~utf8? 32)
+
+(define (symbol-list->flags ls)
+  (let lp ((ls ls) (res ~none))
+    (if (not (pair? ls))
+        res
+        (lp (cdr ls)
+            (flag-join
+             res
+             (case (car ls)
+               ((i ci case-insensitive) ~case-insensitive?)
+               ((m multi-line) ~multi-line?)
+               ((s single-line) ~single-line?)
+               ((x ignore-space) ~ignore-space?)
+               ((u utf8) (if *allow-utf8-mode?* ~utf8? ~none))
+               (else #f)))))))
+
+(define (maybe-string->sre obj)
+  (if (string? obj) (string->sre obj) obj))
+
+(define (string->sre str . o)
+  (if (not (string? str)) (%irregex-error 'string->sre "expected a string" str))
+  (let ((end (string-length str))
+        (flags (symbol-list->flags o)))
+
+    (let lp ((i 0) (from 0) (flags flags) (res '()) (st '()))
+
+      ;; handle case sensitivity at the literal char/string level
+      (define (cased-char ch)
+        (if (and (flag-set? flags ~case-insensitive?)
+                 (char-alphabetic? ch))
+            `(or ,ch ,(char-altcase ch))
+            ch))
+      (define (cased-string str)
+        (if (flag-set? flags ~case-insensitive?)
+            (sre-sequence (map cased-char (string->list str)))
+            str))
+      ;; accumulate the substring from..i as literal text
+      (define (collect)
+        (if (= i from) res (cons (cased-string (substring str from i)) res)))
+      ;; like collect but breaks off the last single character when
+      ;; collecting literal data, as the argument to ?/*/+ etc.
+      (define (collect/single)
+        (let* ((utf8? (flag-set? flags ~utf8?))
+               (j (if (and utf8? (> i 1))
+                      (utf8-backup-to-initial-char str (- i 1))
+                      (- i 1))))
+          (cond
+           ((< j from)
+            res)
+           (else
+            (let ((c (cased-char (if utf8?
+                                     (utf8-string-ref str j (- i j))
+                                     (string-ref str j)))))
+              (cond
+               ((= j from)
+                (cons c res))
+               (else
+                (cons c
+                      (cons (cased-string (substring str from j))
+                            res)))))))))
+      ;; collects for use as a result, reversing and grouping OR
+      ;; terms, and some ugly tweaking of `function-like' groups and
+      ;; conditionals
+      (define (collect/terms)
+        (let* ((ls (collect))
+               (func
+                (and (pair? ls)
+                     (memq (last ls)
+                           '(atomic if look-ahead neg-look-ahead
+                                    look-behind neg-look-behind
+                                    => submatch-named
+                                    w/utf8 w/noutf8))))
+               (prefix (if (and func (memq (car func) '(=> submatch-named)))
+                           (list 'submatch-named (cadr (reverse ls)))
+                           (and func (list (car func)))))
+               (ls (if func
+                       (if (memq (car func) '(=> submatch-named))
+                           (reverse (cddr (reverse ls)))
+                           (reverse (cdr (reverse ls))))
+                       ls)))
+          (let lp ((ls ls) (term '()) (res '()))
+            (define (shift)
+              (cons (sre-sequence term) res))
+            (cond
+             ((null? ls)
+              (let* ((res (sre-alternate (shift)))
+                     (res (if (flag-set? flags ~save?)
+                              (list 'submatch res)
+                              res)))
+                (if prefix
+                    (if (eq? 'if (car prefix))
+                        (cond
+                         ((not (pair? res))
+                          'epsilon)
+                         ((memq (car res)
+                                '(look-ahead neg-look-ahead
+                                             look-behind neg-look-behind))
+                          res)
+                         ((eq? 'seq (car res))
+                          `(if ,(cadr res)
+                               ,(if (pair? (cdr res))
+                                    (sre-sequence (cddr res))
+                                    'epsilon)))
+                         (else
+                          `(if ,(cadadr res)
+                               ,(if (pair? (cdr res))
+                                    (sre-sequence (cddadr res))
+                                    'epsilon)
+                               ,(sre-alternate
+                                 (if (pair? (cdr res)) (cddr res) '())))))
+                        `(,@prefix ,res))
+                    res)))
+             ((eq? 'or (car ls)) (lp (cdr ls) '() (shift)))
+             (else (lp (cdr ls) (cons (car ls) term) res))))))
+      (define (save)
+        (cons (cons flags (collect)) st))
+
+      ;; main parsing
+      (if (>= i end)
+          (if (pair? st)
+              (%irregex-error "unterminated parenthesis in regexp" str)
+              (collect/terms))
+          (let ((c (string-ref str i)))
+            (case c
+              ((#\.)
+               (lp (+ i 1) (+ i 1) flags
+                   (cons (if (flag-set? flags ~single-line?) 'any 'nonl)
+                         (collect))
+                   st))
+              ((#\?)
+               (let ((res (collect/single)))
+                 (if (null? res)
+                     (%irregex-error "? can't follow empty pattern" str res)
+                     (let ((x (car res)))
+                       (lp (+ i 1)
+                           (+ i 1)
+                           flags
+                           (cons
+                            (if (pair? x)
+                                (case (car x)
+                                  ((*)  `(*? ,@(cdr x)))
+                                  ((+)  `(**? 1 #f ,@(cdr x)))
+                                  ((?)  `(?? ,@(cdr x)))
+                                  ((**) `(**? ,@(cdr x)))
+                                  ((=)  `(**? ,(cadr x) ,@(cdr x)))
+                                  ((>=)  `(**? ,(cadr x) #f ,@(cddr x)))
+                                  (else `(? ,x)))
+                                `(? ,x))
+                            (cdr res))
+                           st)))))
+              ((#\+ #\*)
+               (let* ((res (collect/single))
+                      (x (if (pair? res) (car res) 'epsilon))
+                      (op (string->symbol (string c))))
+                 (cond
+                  ((sre-repeater? x)
+                   (%irregex-error "duplicate repetition (e.g. **) in pattern" str res))
+                  ((sre-empty? x)
+                   (%irregex-error "can't repeat empty pattern (e.g. ()*)" str res))
+                  (else
+                   (lp (+ i 1) (+ i 1) flags
+                       (cons (list op x) (cdr res))
+                       st)))))
+              ((#\()
+               (cond
+                ((>= (+ i 1) end)
+                 (%irregex-error "unterminated parenthesis in regexp" str))
+                ((not (memv (string-ref str (+ i 1)) '(#\? #\*))) ; normal case
+                 (lp (+ i 1) (+ i 1) (flag-join flags ~save?) '() (save)))
+                ((>= (+ i 2) end)
+                 (%irregex-error "unterminated parenthesis in regexp" str))
+                ((eqv? (string-ref str (+ i 1)) #\*)
+                 (if (eqv? #\' (string-ref str (+ i 2)))
+                     (with-read-from-string str (+ i 3)
+                       (lambda (sre j)
+                         (if (or (>= j end) (not (eqv? #\) (string-ref str j))))
+                             (%irregex-error "unterminated (*'...) SRE escape" str)
+                             (lp (+ j 1) (+ j 1) flags (cons sre (collect)) st))))
+                     (%irregex-error "bad regexp syntax: (*FOO) not supported" str)))
+                (else                   ;; (?...) case
+                 (case (string-ref str (+ i 2))
+                   ((#\#)
+                    (let ((j (string-scan-char str #\) (+ i 3))))
+                      (lp (+ j i) (+ j 1) flags (collect) st)))
+                   ((#\:)
+                    (lp (+ i 3) (+ i 3) (flag-clear flags ~save?) '() (save)))
+                   ((#\=)
+                    (lp (+ i 3) (+ i 3) (flag-clear flags ~save?)
+                        '(look-ahead) (save)))
+                   ((#\!)
+                    (lp (+ i 3) (+ i 3) (flag-clear flags ~save?)
+                        '(neg-look-ahead) (save)))
+                   ((#\<)
+                    (cond
+                     ((>= (+ i 3) end)
+                      (%irregex-error "unterminated parenthesis in regexp" str))
+                     (else
+                      (case (string-ref str (+ i 3))
+                        ((#\=)
+                         (lp (+ i 4) (+ i 4) (flag-clear flags ~save?)
+                             '(look-behind) (save)))
+                        ((#\!)
+                         (lp (+ i 4) (+ i 4) (flag-clear flags ~save?)
+                             '(neg-look-behind) (save)))
+                        (else
+                         (let ((j (and (char-alphabetic?
+                                        (string-ref str (+ i 3)))
+                                       (string-scan-char str #\> (+ i 4)))))
+                           (if j
+                               (lp (+ j 1) (+ j 1) (flag-clear flags ~save?)
+                                   `(,(string->symbol (substring str (+ i 3) j))
+                                     submatch-named)
+                                   (save))
+                               (%irregex-error "invalid (?< sequence" str))))))))
+                   ((#\>)
+                    (lp (+ i 3) (+ i 3) (flag-clear flags ~save?)
+                        '(atomic) (save)))
+                   ;;((#\' #\P) ; named subpatterns
+                   ;; )
+                   ;;((#\R) ; recursion
+                   ;; )
+                   ((#\()
+                    (cond
+                     ((>= (+ i 3) end)
+                      (%irregex-error "unterminated parenthesis in regexp" str))
+                     ((char-numeric? (string-ref str (+ i 3)))
+                      (let* ((j (string-scan-char str #\) (+ i 3)))
+                             (n (string->number (substring str (+ i 3) j))))
+                        (if (not n)
+                            (%irregex-error "invalid conditional reference" str)
+                            (lp (+ j 1) (+ j 1) (flag-clear flags ~save?)
+                                `(,n if) (save)))))
+                     ((char-alphabetic? (string-ref str (+ i 3)))
+                      (let* ((j (string-scan-char str #\) (+ i 3)))
+                             (s (string->symbol (substring str (+ i 3) j))))
+                        (lp (+ j 1) (+ j 1) (flag-clear flags ~save?)
+                            `(,s if) (save))))
+                     (else
+                      (lp (+ i 2) (+ i 2) (flag-clear flags ~save?)
+                          '(if) (save)))))
+                   ((#\{)
+                    (%irregex-error "unsupported Perl-style cluster" str))
+                   (else
+                    (let ((old-flags flags))
+                      (let lp2 ((j (+ i 2)) (flags flags) (invert? #f))
+                        (define (join x)
+                          ((if invert? flag-clear flag-join) flags x))
+                        (define (new-res res)
+                          (let ((before (flag-set? old-flags ~utf8?))
+                                (after (flag-set? flags ~utf8?)))
+                            (if (eq? before after)
+                                res
+                                (cons (if after 'w/utf8 'w/noutf8) res))))
+                        (cond
+                         ((>= j end)
+                          (%irregex-error "incomplete cluster" str i))
+                         (else
+                          (case (string-ref str j)
+                            ((#\i)
+                             (lp2 (+ j 1) (join ~case-insensitive?) invert?))
+                            ((#\m)
+                             (lp2 (+ j 1) (join ~multi-line?) invert?))
+                            ((#\x)
+                             (lp2 (+ j 1) (join ~ignore-space?) invert?))
+                            ((#\u)
+                             (if *allow-utf8-mode?*
+                                 (lp2 (+ j 1) (join ~utf8?) invert?)
+                                 (lp2 (+ j 1) flags invert?)))
+                            ((#\-)
+                             (lp2 (+ j 1) flags (not invert?)))
+                            ((#\))
+                             (lp (+ j 1) (+ j 1) flags (new-res (collect))
+                                 st))
+                            ((#\:)
+                             (lp (+ j 1) (+ j 1) flags (new-res '())
+                                 (cons (cons old-flags (collect)) st)))
+                            (else
+                             (%irregex-error "unknown regex cluster modifier" str)
+                             )))))))))))
+              ((#\))
+               (if (null? st)
+                   (%irregex-error "too many )'s in regexp" str)
+                   (lp (+ i 1)
+                       (+ i 1)
+                       (caar st)
+                       (cons (collect/terms) (cdar st))
+                       (cdr st))))
+              ((#\[)
+               (apply
+                (lambda (sre j)
+                  (lp (+ j 1) (+ j 1) flags (cons sre (collect)) st))
+                (string-parse-cset str (+ i 1) flags)))
+              ((#\{)
+               (cond
+                ((or (>= (+ i 1) end)
+                     (not (or (char-numeric? (string-ref str (+ i 1)))
+                              (eqv? #\, (string-ref str (+ i 1))))))
+                 (lp (+ i 1) from flags res st))
+                (else
+                 (let ((res (collect/single)))
+                   (cond
+                    ((null? res)
+                     (%irregex-error "{ can't follow empty pattern"))
+                    (else
+                     (let* ((x (car res))
+                            (tail (cdr res))
+                            (j (string-scan-char str #\} (+ i 1)))
+                            (s2 (string-split-char (substring str (+ i 1) j)
+                                                   #\,))
+                            (n (string->number (car s2)))
+                            (m (and (pair? (cdr s2))
+                                    (string->number (cadr s2)))))
+                       (cond
+                        ((or (not n)
+                             (and (pair? (cdr s2))
+                                  (not (equal? "" (cadr s2)))
+                                  (not m)))
+                         (%irregex-error "invalid {n} repetition syntax" s2))
+                        ((null? (cdr s2))
+                         (lp (+ j 1) (+ j 1) flags `((= ,n ,x) ,@tail) st))
+                        (m
+                         (lp (+ j 1) (+ j 1) flags `((** ,n ,m ,x) ,@tail) st))
+                        (else
+                         (lp (+ j 1) (+ j 1) flags `((>= ,n ,x) ,@tail) st)
+                         )))))))))
+              ((#\\)
+               (cond
+                ((>= (+ i 1) end)
+                 (%irregex-error "incomplete escape sequence" str))
+                (else
+                 (let ((c (string-ref str (+ i 1))))
+                   (case c
+                     ((#\d)
+                      (lp (+ i 2) (+ i 2) flags `(numeric ,@(collect)) st))
+                     ((#\D)
+                      (lp (+ i 2) (+ i 2) flags `((~ numeric) ,@(collect)) st))
+                     ((#\s)
+                      (lp (+ i 2) (+ i 2) flags `(space ,@(collect)) st))
+                     ((#\S)
+                      (lp (+ i 2) (+ i 2) flags `((~ space) ,@(collect)) st))
+                     ((#\w)
+                      (lp (+ i 2) (+ i 2) flags
+                          `((or alphanumeric ("_")) ,@(collect)) st))
+                     ((#\W)
+                      (lp (+ i 2) (+ i 2) flags
+                          `((~ (or alphanumeric ("_"))) ,@(collect)) st))
+                     ((#\b)
+                      (lp (+ i 2) (+ i 2) flags
+                          `((or bow eow) ,@(collect)) st))
+                     ((#\B)
+                      (lp (+ i 2) (+ i 2) flags `(nwb ,@(collect)) st))
+                     ((#\A)
+                      (lp (+ i 2) (+ i 2) flags `(bos ,@(collect)) st))
+                     ((#\Z)
+                      (lp (+ i 2) (+ i 2) flags
+                          `((? #\newline) eos ,@(collect)) st))
+                     ((#\z)
+                      (lp (+ i 2) (+ i 2) flags `(eos ,@(collect)) st))
+                     ((#\R)
+                      (lp (+ i 2) (+ i 2) flags `(newline ,@(collect)) st))
+                     ((#\K)
+                      (lp (+ i 2) (+ i 2) flags `(reset ,@(collect)) st))
+                     ;; these two are from Emacs and TRE, but not in PCRE
+                     ((#\<)
+                      (lp (+ i 2) (+ i 2) flags `(bow ,@(collect)) st))
+                     ((#\>)
+                      (lp (+ i 2) (+ i 2) flags `(eow ,@(collect)) st))
+                     ((#\x)
+                      (apply
+                       (lambda (ch j)
+                         (lp (+ j 1) (+ j 1) flags `(,ch ,@(collect)) st))
+                       (string-parse-hex-escape str (+ i 2) end)))
+                     ((#\k)
+                      (let ((c (string-ref str (+ i 2))))
+                        (if (not (memv c '(#\< #\{ #\')))
+                            (%irregex-error "bad \\k usage, expected \\k<...>" str)
+                            (let* ((terminal (char-mirror c))
+                                   (j (string-scan-char str terminal (+ i 2)))
+                                   (s (and j (substring str (+ i 3) j)))
+                                   (backref
+                                    (if (flag-set? flags ~case-insensitive?)
+                                        'backref-ci
+                                        'backref)))
+                              (if (not j)
+                                  (%irregex-error "unterminated named backref" str)
+                                  (lp (+ j 1) (+ j 1) flags
+                                      `((,backref ,(string->symbol s))
+                                        ,@(collect))
+                                      st))))))
+                     ((#\Q)  ;; \Q..\E escapes
+                      (let ((res (collect)))
+                        (let lp2 ((j (+ i 2)))
+                          (cond
+                           ((>= j end)
+                            (lp j (+ i 2) flags res st))
+                           ((eqv? #\\ (string-ref str j))
+                            (cond
+                             ((>= (+ j 1) end)
+                              (lp (+ j 1) (+ i 2) flags res st))
+                             ((eqv? #\E (string-ref str (+ j 1)))
+                              (lp (+ j 2) (+ j 2) flags
+                                  (cons (substring str (+ i 2) j) res) st))
+                             (else
+                              (lp2 (+ j 2)))))
+                           (else
+                            (lp2 (+ j 1)))))))
+                     ((#\')
+                      (with-read-from-string str (+ i 2)
+                       (lambda (sre j)
+                         (lp j j flags (cons sre (collect)) st))))
+                     ;;((#\p)  ; XXXX unicode properties
+                     ;; )
+                     ;;((#\P)
+                     ;; )
+                     (else
+                      (cond
+                       ((char-numeric? c)
+                        (let* ((j (or (string-scan-pred
+                                       str
+                                       (lambda (c) (not (char-numeric? c)))
+                                       (+ i 2))
+                                      end))
+                               (backref
+                                (if (flag-set? flags ~case-insensitive?)
+                                    'backref-ci
+                                    'backref))
+                               (res `((,backref ,(string->number
+                                                  (substring str (+ i 1) j)))
+                                      ,@(collect))))
+                          (lp j j flags res st)))
+                       ((char-alphabetic? c)
+                        (let ((cell (assv c posix-escape-sequences)))
+                          (if cell
+                              (lp (+ i 2) (+ i 2) flags
+                                  (cons (cdr cell) (collect)) st)
+                              (%irregex-error "unknown escape sequence" str c))))
+                       (else
+                        (lp (+ i 2) (+ i 1) flags (collect) st)))))))))
+              ((#\|)
+               (lp (+ i 1) (+ i 1) flags (cons 'or (collect)) st))
+              ((#\^)
+               (let ((sym (if (flag-set? flags ~multi-line?) 'bol 'bos)))
+                 (lp (+ i 1) (+ i 1) flags (cons sym (collect)) st)))
+              ((#\$)
+               (let ((sym (if (flag-set? flags ~multi-line?) 'eol 'eos)))
+                 (lp (+ i 1) (+ i 1) flags (cons sym (collect)) st)))
+              ((#\space)
+               (if (flag-set? flags ~ignore-space?)
+                   (lp (+ i 1) (+ i 1) flags (collect) st)
+                   (lp (+ i 1) from flags res st)))
+              ((#\#)
+               (if (flag-set? flags ~ignore-space?)
+                   (let ((j (or (string-scan-char str #\newline (+ i 1))
+                                (- end 1))))
+                     (lp (+ j 1) (+ j 1) flags (collect) st))
+                   (lp (+ i 1) from flags res st)))
+              (else
+               (lp (+ i 1) from flags res st))))))))
+
+(define posix-escape-sequences
+  `((#\n . #\newline)
+    (#\r . ,(integer->char (+ (char->integer #\newline) 3)))
+    (#\t . ,(integer->char (- (char->integer #\newline) 1)))
+    (#\a . ,(integer->char (- (char->integer #\newline) 3)))
+    (#\e . ,(integer->char (+ (char->integer #\newline) #x11)))
+    (#\f . ,(integer->char (+ (char->integer #\newline) 2)))
+    ))
+
+(define (char-altcase c)
+  (if (char-upper-case? c) (char-downcase c) (char-upcase c)))
+
+(define (char-mirror c)
+  (case c ((#\<) #\>) ((#\{) #\}) ((#\() #\)) ((#\[) #\]) (else c)))
+
+(define (string-parse-hex-escape str i end)
+  (cond
+   ((>= i end)
+    (%irregex-error "incomplete hex escape" str i))
+   ((eqv? #\{ (string-ref str i))
+    (let ((j (string-scan-char-escape str #\} (+ i 1))))
+      (if (not j)
+          (%irregex-error "incomplete hex brace escape" str i)
+          (let* ((s (substring str (+ i 1) j))
+                 (n (string->number s 16)))
+            (if n
+                (list (integer->char n) j)
+                (%irregex-error "bad hex brace escape" s))))))
+   ((>= (+ i 1) end)
+    (%irregex-error "incomplete hex escape" str i))
+   (else
+    (let* ((s (substring str i (+ i 2)))
+           (n (string->number s 16)))
+      (if n
+          (list (integer->char n) (+ i 2))
+          (%irregex-error "bad hex escape" s))))))
+
+(define (string-parse-cset str start flags)
+  (let* ((end (string-length str))
+         (invert? (and (< start end) (eqv? #\^ (string-ref str start))))
+         (utf8? (flag-set? flags ~utf8?)))
+    (define (go i chars ranges)
+      (if (>= i end)
+          (%irregex-error "incomplete char set" str i end)
+          (let ((c (string-ref str i)))
+            (case c
+              ((#\])
+               (if (and (null? chars) (null? ranges))
+                   (go (+ i 1) (cons #\] chars) ranges)
+                   (let ((ci? (flag-set? flags ~case-insensitive?))
+                         (hi-chars (if utf8? (filter high-char? chars) '()))
+                         (chars (if utf8? (remove high-char? chars) chars)))
+                     (list
+                      ((lambda (res)
+                         (if invert? (cons '~ res) (sre-alternate res)))
+                       (append
+                        hi-chars
+                        (if (pair? chars)
+                            (list
+                             (list (list->string
+                                    ((if ci?
+                                         cset-case-insensitive
+                                         (lambda (x) x))
+                                     (reverse chars)))))
+                            '())
+                        (if (pair? ranges)
+                            (let ((res (if ci?
+                                           (cset-case-insensitive
+                                            (reverse ranges))
+                                           (reverse ranges))))
+                              (list (cons '/ (alist->plist res))))
+                            '())))
+                      i))))
+              ((#\-)
+               (cond
+                ((or (= i start)
+                     (and (= i (+ start 1)) (eqv? #\^ (string-ref str start)))
+                     (eqv? #\] (string-ref str (+ i 1))))
+                 (go (+ i 1) (cons c chars) ranges))
+                ((null? chars)
+                 (%irregex-error "bad char-set"))
+                (else
+                 (let* ((c1 (car chars))
+                        (c2 (string-ref str (+ i 1))))
+                   (apply
+                    (lambda (c2 j)
+                      (if (char<? c2 c1)
+                          (%irregex-error "inverted range in char-set" c1 c2)
+                          (go j (cdr chars) (cons (cons c1 c2) ranges))))
+                    (cond
+                     ((and (eqv? #\\ c2) (assv c2 posix-escape-sequences))
+                      => (lambda (x) (list (cdr x) (+ i 3))))
+                     ((and (eqv? #\\ c2)
+                           (eqv? (string-ref str (+ i 2)) #\x))
+                      (string-parse-hex-escape str (+ i 3) end))
+                     ((and utf8? (<= #x80 (char->integer c2) #xFF))
+                      (let ((len (utf8-start-char->length c2)))
+                        (list (utf8-string-ref str (+ i 1) len) (+ i 1 len))))
+                     (else
+                      (list c2 (+ i 2)))))))))
+              ((#\[)
+               (let* ((inv? (eqv? #\^ (string-ref str (+ i 1))))
+                      (i2 (if inv? (+ i 2) (+ i 1))))
+                 (case (string-ref str i2)
+                   ((#\:)
+                    (let ((j (string-scan-char str #\: (+ i2 1))))
+                      (if (or (not j) (not (eqv? #\] (string-ref str (+ j 1)))))
+                          (%irregex-error "incomplete character class" str)
+                          (let* ((cset (sre->cset
+                                        (string->symbol
+                                         (substring str (+ i2 1) j))))
+                                 (cset (if inv? (cset-complement cset) cset)))
+                            (go (+ j 2)
+                                (append (filter char? cset) chars)
+                                (append (filter pair? cset) ranges))))))
+                   ((#\= #\.)
+                    (%irregex-error "collating sequences not supported" str))
+                   (else
+                    (go (+ i 1) (cons #\[ chars) ranges)))))
+              ((#\\)
+               (let ((c (string-ref str (+ i 1))))
+                 (case c
+                   ((#\d #\D #\s #\S #\w #\W)
+                    (let ((cset (sre->cset (string->sre (string #\\ c)))))
+                      (go (+ i 2)
+                          (append (filter char? cset) chars)
+                          (append (filter pair? cset) ranges))))
+                   ((#\x)
+                    (apply
+                     (lambda (ch j)
+                       (go j (cons ch chars) ranges))
+                     (string-parse-hex-escape str (+ i 2) end)))
+                   (else
+                    (let ((c (cond ((assv c posix-escape-sequences) => cdr)
+                                   (else c))))
+                      (go (+ i 2)
+                          (cons (string-ref str (+ i 1)) (cons c chars))
+                          ranges))))))
+              (else
+               (if (and utf8? (<= #x80 (char->integer c) #xFF))
+                   (let ((len (utf8-start-char->length c)))
+                     (go (+ i len)
+                         (cons (utf8-string-ref str i len) chars)
+                         ranges))
+                   (go (+ i 1) (cons c chars) ranges)))))))
+    (if invert?
+        (go (+ start 1)
+            (if (flag-set? flags ~multi-line?) '(#\newline) '())
+            '())
+        (go start '() '()))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; UTF-8 Utilities
+
+;; Here are some hairy optimizations that need to be documented
+;; better.  Thanks to these, we never do any utf8 processing once the
+;; regexp is compiled.
+
+;; two chars: ab..ef
+;;            a[b..xFF]|[b-d][x80..xFF]|e[x80..xFF]
+
+;; three chars: abc..ghi
+;;              ab[c..xFF]|a[d..xFF][x80..xFF]|
+;;              [b..f][x80..xFF][x80..xFF]|
+;;              g[x80..g][x80..xFF]|gh[x80..i]
+
+;; four chars: abcd..ghij
+;;             abc[d..xFF]|ab[d..xFF][x80..xFF]|a[c..xFF][x80..xFF][x80..xFF]|
+;;             [b..f][x80..xFF][x80..xFF][x80..xFF]|
+;;             g[x80..g][x80..xFF][x80..xFF]|gh[x80..h][x80..xFF]|ghi[x80..j]
+
+(define (high-char? c) (<= #x80 (char->integer c)))
+
+;; number of total bytes in a utf8 char given the 1st byte
+
+(define utf8-start-char->length
+  (let ((table '#(
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 0x
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 1x
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 2x
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 3x
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 4x
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 5x
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 6x
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 7x
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 8x
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 9x
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; ax
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; bx
+2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ; cx
+2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ; dx
+3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ; ex
+4 4 4 4 4 4 4 4 5 5 5 5 6 6 0 0 ; fx
+)))
+    (lambda (c) (vector-ref table (char->integer c)))))
+
+(define (utf8-string-ref str i len)
+  (define (byte n) (char->integer (string-ref str n)))
+  (case len
+    ((1) ; shouldn't happen in this module
+     (string-ref str i))
+    ((2)
+     (integer->char
+      (+ (bit-shl (bit-and (byte i) #b00011111) 6)
+         (bit-and (byte (+ i 1)) #b00111111))))
+    ((3)
+     (integer->char
+      (+ (bit-shl (bit-and (byte i) #b00001111) 12)
+         (bit-shl (bit-and (byte (+ i 1)) #b00111111) 6)
+         (bit-and (byte (+ i 2)) #b00111111))))
+    ((4)
+     (integer->char
+      (+ (bit-shl (bit-and (byte i) #b00000111) 18)
+         (bit-shl (bit-and (byte (+ i 1)) #b00111111) 12)
+         (bit-shl (bit-and (byte (+ i 2)) #b00111111) 6)
+         (bit-and (byte (+ i 3)) #b00111111))))
+    (else
+     (%irregex-error "invalid utf8 length" str len i))))
+
+(define (utf8-backup-to-initial-char str i)
+  (let lp ((i i))
+    (if (= i 0)
+        0
+        (let ((c (char->integer (string-ref str i))))
+          (if (or (< c #x80) (>= c #xC0))
+              i
+              (lp (- i 1)))))))
+
+(define (utf8-lowest-digit-of-length len)
+  (case len
+    ((1) 0) ((2) #xC0) ((3) #xE0) ((4) #xF0)
+    (else (%irregex-error "invalid utf8 length" len))))
+
+(define (utf8-highest-digit-of-length len)
+  (case len
+    ((1) #x7F) ((2) #xDF) ((3) #xEF) ((4) #xF7)
+    (else (%irregex-error "invalid utf8 length" len))))
+
+(define (char->utf8-list c)
+  (let ((i (char->integer c)))
+    (cond
+     ((<= i #x7F) (list i))
+     ((<= i #x7FF)
+      (list (bit-ior #b11000000 (bit-shr i 6))
+            (bit-ior #b10000000 (bit-and i #b111111))))
+     ((<= i #xFFFF)
+      (list (bit-ior #b11100000 (bit-shr i 12))
+            (bit-ior #b10000000 (bit-and (bit-shr i 6) #b111111))
+            (bit-ior #b10000000 (bit-and i #b111111))))
+     ((<= i #x1FFFFF)
+      (list (bit-ior #b11110000 (bit-shr i 18))
+            (bit-ior #b10000000 (bit-and (bit-shr i 12) #b111111))
+            (bit-ior #b10000000 (bit-and (bit-shr i 6) #b111111))
+            (bit-ior #b10000000 (bit-and i #b111111))))
+     (else (%irregex-error "unicode codepoint out of range:" i)))))
+
+(define (unicode-range->utf8-pattern lo hi)
+  (let ((lo-ls (char->utf8-list lo))
+        (hi-ls (char->utf8-list hi)))
+    (if (not (= (length lo-ls) (length hi-ls)))
+        (sre-alternate (list (unicode-range-climb-digits lo-ls hi-ls)
+                             (unicode-range-up-to hi-ls)))
+        (let lp ((lo-ls lo-ls) (hi-ls hi-ls))
+          (cond
+           ((null? lo-ls)
+            '())
+           ((= (car lo-ls) (car hi-ls))
+            (sre-sequence
+             (list (integer->char (car lo-ls))
+                   (lp (cdr lo-ls) (cdr hi-ls)))))
+           ((= (+ (car lo-ls) 1) (car hi-ls))
+            (sre-alternate (list (unicode-range-up-from lo-ls)
+                                 (unicode-range-up-to hi-ls))))
+           (else
+            (sre-alternate (list (unicode-range-up-from lo-ls)
+                                 (unicode-range-middle lo-ls hi-ls)
+                                 (unicode-range-up-to hi-ls)))))))))
+
+(define (unicode-range-helper one ls prefix res)
+  (if (null? ls)
+      res
+      (unicode-range-helper
+       one
+       (cdr ls)
+       (cons (car ls) prefix)
+       (cons (sre-sequence
+              `(,@(map integer->char prefix)
+                ,(one (car ls))
+                ,@(map (lambda (_)
+                         `(/ ,(integer->char #x80)
+                             ,(integer->char #xFF)))
+                       (cdr ls))))
+             res))))
+
+(define (unicode-range-up-from lo-ls)
+  (sre-sequence
+   (list (integer->char (car lo-ls))
+         (sre-alternate
+          (unicode-range-helper
+           (lambda (c)
+             `(/ ,(integer->char (+ (car lo-ls) 1)) ,(integer->char #xFF)))
+           (cdr (reverse (cdr lo-ls)))
+           '()
+           (list
+            (sre-sequence
+             (append
+              (map integer->char (reverse (cdr (reverse (cdr lo-ls)))))
+              `((/ ,(integer->char (last lo-ls))
+                   ,(integer->char #xFF)))))))))))
+
+(define (unicode-range-up-to hi-ls)
+  (sre-sequence
+   (list (integer->char (car hi-ls))
+         (sre-alternate
+          (unicode-range-helper
+           (lambda (c)
+             `(/ ,(integer->char #x80) ,(integer->char (- (car hi-ls) 1))))
+           (cdr (reverse (cdr hi-ls)))
+           '()
+           (list
+            (sre-sequence
+             (append
+              (map integer->char (reverse (cdr (reverse (cdr hi-ls)))))
+              `((/ ,(integer->char #x80)
+                   ,(integer->char (last hi-ls))))))))))))
+
+(define (unicode-range-climb-digits lo-ls hi-ls)
+  (let ((lo-len (length lo-ls)))
+    (sre-alternate
+     (append
+      (list
+       (sre-sequence
+        (cons `(/ ,(integer->char (car lo-ls))
+                  ,(integer->char (if (<= (car lo-ls) #x7F) #x7F #xFF)))
+              (map (lambda (_)
+                     `(/ ,(integer->char #x80) ,(integer->char #xFF)))
+                   (cdr lo-ls)))))
+      (map
+       (lambda (i)
+         (sre-sequence
+          (cons
+           `(/ ,(integer->char (utf8-lowest-digit-of-length (+ i lo-len 1)))
+               ,(integer->char (utf8-highest-digit-of-length (+ i lo-len 1))))
+           (map (lambda (_)
+                  `(/ ,(integer->char #x80) ,(integer->char #xFF)))
+                (zero-to (+ i lo-len))))))
+       (zero-to (- (length hi-ls) lo-len 1)))
+      (list
+       (sre-sequence
+        (cons `(/ ,(integer->char
+                    (utf8-lowest-digit-of-length
+                     (utf8-start-char->length
+                      (integer->char (- (car hi-ls) 1)))))
+                  ,(integer->char (- (car hi-ls) 1)))
+              (map (lambda (_)
+                     `(/ ,(integer->char #x80) ,(integer->char #xFF)))
+                   (cdr hi-ls)))))))))
+
+(define (unicode-range-middle lo-ls hi-ls)
+  (let ((lo (integer->char (+ (car lo-ls) 1)))
+        (hi (integer->char (- (car hi-ls) 1))))
+    (sre-sequence
+     (cons (if (char=? lo hi) lo `(/ ,lo ,hi))
+           (map (lambda (_) `(/ ,(integer->char #x80) ,(integer->char #xFF)))
+                (cdr lo-ls))))))
+
+(define (cset->utf8-pattern cset)
+  (let lp ((ls cset) (alts '()) (lo-cset '()))
+    (cond
+     ((null? ls)
+      (sre-alternate (append (reverse alts)
+                             (if (null? lo-cset)
+                                 '()
+                                 (list (cons '/ (reverse lo-cset)))))))
+     ((char? (car ls))
+      (if (high-char? (car ls))
+          (lp (cdr ls) (cons (car ls) alts) lo-cset)
+          (lp (cdr ls) alts (cons (car ls) lo-cset))))
+     (else
+      (if (or (high-char? (caar ls))  (high-char? (cdar ls)))
+          (lp (cdr ls)
+              (cons (unicode-range->utf8-pattern (caar ls) (cdar ls)) alts)
+              lo-cset)
+          (lp (cdr ls) alts (cons (cdar ls) (cons (caar ls) lo-cset))))))))
+
+(define (sre-adjust-utf8 sre flags)
+  (let adjust ((sre sre)
+               (utf8? (flag-set? flags ~utf8?))
+               (ci? (flag-set? flags ~case-insensitive?)))
+    (define (rec sre) (adjust sre utf8? ci?))
+    (cond
+     ((pair? sre)
+      (case (car sre)
+        ((w/utf8) (adjust (sre-sequence (cdr sre)) #t ci?))
+        ((w/noutf8) (adjust (sre-sequence (cdr sre)) #f ci?))
+        ((w/case)
+         (cons (car sre) (map (lambda (s) (adjust s utf8? #f)) (cdr sre))))
+        ((w/nocase)
+         (cons (car sre) (map (lambda (s) (adjust s utf8? #t)) (cdr sre))))
+        ((/ ~ & -)
+         (if (not utf8?)
+             sre
+             (let ((cset (sre->cset sre ci?)))
+               (if (any (lambda (x)
+                          (if (pair? x)
+                              (or (high-char? (car x)) (high-char? (cdr x)))
+                              (high-char? x)))
+                        cset)
+                   (if ci?
+                       (list 'w/case (cset->utf8-pattern cset))
+                       (cset->utf8-pattern cset))
+                   sre))))
+        ((*)
+         (case (sre-sequence (cdr sre))
+           ;; special case optimization: .* w/utf8 == .* w/noutf8
+           ((any) '(* any))
+           ((nonl) '(* nonl))
+           (else (cons '* (map rec (cdr sre))))))
+        (else
+         (cons (car sre) (map rec (cdr sre))))))
+     (else
+      (case sre
+        ((any) 'utf8-any)
+        ((nonl) 'utf8-nonl)
+        (else
+         (if (and utf8? (char? sre) (high-char? sre))
+             (sre-sequence (map integer->char (char->utf8-list sre)))
+             sre)))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Compilation
+
+(cond-expand
+  (building-chicken
+   (define-syntax cached
+     (syntax-rules ()
+       ((_ arg fail) (build-cache 5 arg fail)))))
+  (else
+   (define-syntax cached
+     (syntax-rules ()
+       ((_ arg fail) fail)))))
+
+(define (irregex x . o)
+  (cond ((irregex? x) x)
+	((null? o)
+	 (cached
+	  x
+	  (if (string? x)
+	      (string->irregex x)
+	      (sre->irregex x))))
+	(else
+	 (if (string? x)
+	     (apply string->irregex x o)
+	     (apply sre->irregex x o)))))
+
+(define (string->irregex str . o)
+  (apply sre->irregex (apply string->sre str o) o))
+
+(define (sre->irregex sre . o)
+  (let* ((pat-flags (symbol-list->flags o))
+         (sre (if *allow-utf8-mode?*
+                  (sre-adjust-utf8 sre pat-flags)
+                  sre))
+         (searcher? (sre-searcher? sre))
+         (sre-dfa (if searcher? (sre-remove-initial-bos sre) sre))
+         (dfa-limit (cond ((memq 'small o) 1) ((memq 'fast o) 50) (else 10)))
+         (dfa/search
+          (cond ((memq 'backtrack o) #f)
+                (searcher? #t)
+                ((sre->nfa `(seq (* any) ,sre-dfa) pat-flags)
+                 => (lambda (nfa)
+                      (nfa->dfa nfa (* dfa-limit (nfa-num-states nfa)))))
+                (else #f)))
+         (dfa (cond ((and dfa/search (sre->nfa sre-dfa pat-flags))
+                     => (lambda (nfa)
+                          (nfa->dfa nfa (* dfa-limit (nfa-num-states nfa)))))
+                    (else #f)))
+         (submatches (sre-count-submatches sre-dfa))
+         (extractor
+          (and dfa dfa/search (sre-match-extractor sre-dfa submatches)))
+         (names (sre-names sre-dfa 1 '()))
+         (lens (sre-length-ranges sre-dfa names))
+         (flags (flag-join
+                 (flag-join ~none (and searcher? ~searcher?))
+                 (and (sre-consumer? sre) ~consumer?))))
+    (cond
+     (dfa
+      (make-irregex dfa dfa/search extractor #f flags submatches lens names))
+     (else
+      (let ((f (sre->procedure sre pat-flags names)))
+        (make-irregex #f #f #f f flags submatches lens names))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; SRE Analysis
+
+;; returns #t if the sre can ever be empty
+(define (sre-empty? sre)
+  (if (pair? sre)
+      (case (car sre)
+        ((* ? look-ahead look-behind neg-look-ahead neg-look-behind) #t)
+        ((**) (or (not (number? (cadr sre))) (zero? (cadr sre))))
+        ((or) (any sre-empty? (cdr sre)))
+        ((: seq $ submatch => submatch-named + atomic)
+         (every sre-empty? (cdr sre)))
+        (else #f))
+      (memq sre '(epsilon bos eos bol eol bow eow commit))))
+
+(define (sre-any? sre)
+  (or (eq? sre 'any)
+      (and (pair? sre)
+           (case (car sre)
+             ((seq : $ submatch => submatch-named)
+              (and (pair? (cdr sre)) (null? (cddr sre)) (sre-any? (cadr sre))))
+             ((or) (every sre-any? (cdr sre)))
+             (else #f)))))
+
+(define (sre-repeater? sre)
+  (and (pair? sre)
+       (or (memq (car sre) '(* +))
+           (and (memq (car sre) '($ submatch => submatch-named seq :))
+                (pair? (cdr sre))
+                (null? (cddr sre))
+                (sre-repeater? (cadr sre))))))
+
+(define (sre-searcher? sre)
+  (if (pair? sre)
+      (case (car sre)
+        ((* +) (sre-any? (sre-sequence (cdr sre))))
+        ((seq : $ submatch => submatch-named)
+         (and (pair? (cdr sre)) (sre-searcher? (cadr sre))))
+        ((or) (every sre-searcher? (cdr sre)))
+        (else #f))
+      (eq? 'bos sre)))
+
+(define (sre-consumer? sre)
+  (if (pair? sre)
+      (case (car sre)
+        ((* +) (sre-any? (sre-sequence (cdr sre))))
+        ((seq : $ submatch => submatch-named)
+         (and (pair? (cdr sre)) (sre-consumer? (last sre))))
+        ((or) (every sre-consumer? (cdr sre)))
+        (else #f))
+      (eq? 'eos sre)))
+
+(define (sre-has-submatches? sre)
+  (and (pair? sre)
+       (or (memq (car sre) '($ submatch => submatch-named))
+           (if (eq? 'posix-string (car sre))
+               (sre-has-submatches? (string->sre (cadr sre)))
+               (any sre-has-submatches? (cdr sre))))))
+
+(define (sre-count-submatches sre)
+  (let count ((sre sre) (sum 0))
+    (if (pair? sre)
+        (fold count
+              (+ sum (case (car sre)
+                       (($ submatch => submatch-named) 1)
+                       ((dsm) (+ (cadr sre) (caddr sre)))
+                       ((posix-string)
+                        (sre-count-submatches (string->sre (cadr sre))))
+                       (else 0)))
+              (cdr sre))
+        sum)))
+
+(define (sre-length-ranges sre . o)
+  (let ((names (if (pair? o) (car o) (sre-names sre 1 '())))
+        (sublens (make-vector (+ 1 (sre-count-submatches sre)) #f)))
+    (vector-set!
+     sublens
+     0
+     (let lp ((sre sre) (n 1) (lo 0) (hi 0) (return cons))
+       (define (grow i) (return (+ lo i) (and hi (+ hi i))))
+       (cond
+        ((pair? sre)
+         (if (string? (car sre))
+             (grow 1)
+             (case (car sre)
+               ((/ ~ & -)
+                (grow 1))
+               ((posix-string)
+                (lp (string->sre (cadr sre)) n lo hi return))
+               ((seq : w/case w/nocase atomic)
+                (let lp2 ((ls (cdr sre)) (n n) (lo2 0) (hi2 0))
+                  (if (null? ls)
+                      (return (+ lo lo2) (and hi hi2 (+ hi hi2)))
+                      (lp (car ls) n 0 0
+                          (lambda (lo3 hi3)
+                            (lp2 (cdr ls)
+                                 (+ n (sre-count-submatches (car ls)))
+                                 (+ lo2 lo3)
+                                 (and hi2 hi3 (+ hi2 hi3))))))))
+               ((or)
+                (let lp2 ((ls (cdr sre)) (n n) (lo2 #f) (hi2 0))
+                  (if (null? ls)
+                      (return (+ lo (or lo2 1)) (and hi hi2 (+ hi hi2)))
+                      (lp (car ls) n 0 0
+                          (lambda (lo3 hi3)
+                            (lp2 (cdr ls)
+                                 (+ n (sre-count-submatches (car ls)))
+                                 (if lo2 (min lo2 lo3) lo3)
+                                 (and hi2 hi3 (max hi2 hi3))))))))
+               ((if)
+                (cond
+                 ((or (null? (cdr sre)) (null? (cddr sre)))
+                  (return lo hi))
+                 (else
+                  (let ((n1 (sre-count-submatches (car sre)))
+                        (n2 (sre-count-submatches (cadr sre))))
+                    (lp (if (or (number? (cadr sre)) (symbol? (cadr sre)))
+                            'epsilon
+                            (cadr sre))
+                        n lo hi
+                        (lambda (lo2 hi2)
+                          (lp (caddr sre) (+ n n1) 0 0
+                              (lambda (lo3 hi3)
+                                (lp (if (pair? (cdddr sre))
+                                        (cadddr sre)
+                                        'epsilon)
+                                    (+ n n1 n2) 0 0
+                                    (lambda (lo4 hi4)
+                                      (return (+ lo2 (min lo3 lo4))
+                                              (and hi2 hi3 hi4
+                                                   (+ hi2 (max hi3 hi4))
+                                                   ))))))))))))
+               ((dsm)
+                (lp (sre-sequence (cdddr sre)) (+ n (cadr sre)) lo hi return))
+               (($ submatch => submatch-named)
+                (lp (sre-sequence
+                     (if (eq? 'submatch (car sre)) (cdr sre) (cddr sre)))
+                    (+ n 1) lo hi
+                    (lambda (lo2 hi2)
+                      (vector-set! sublens n (cons lo2 hi2))
+                      (return lo2 hi2))))
+               ((backref backref-ci)
+                (let ((n (cond
+                          ((number? (cadr sre)) (cadr sre))
+                          ((assq (cadr sre) names) => cdr)
+                          (else (%irregex-error "unknown backreference" (cadr sre))))))
+                  (cond
+                   ((or (not (integer? n))
+                        (not (< 0 n (vector-length sublens))))
+                    (%irregex-error 'sre-length "invalid backreference" sre))
+                   ((not (vector-ref sublens n))
+                    (%irregex-error 'sre-length "invalid forward backreference" sre))
+                   (else
+                    (let ((lo2 (car (vector-ref sublens n)))
+                          (hi2 (cdr (vector-ref sublens n))))
+                      (return (+ lo lo2) (and hi hi2 (+ hi hi2))))))))
+               ((* *?)
+                (lp (sre-sequence (cdr sre)) n lo hi (lambda (lo hi) #f))
+                (return lo #f))
+               ((** **?)
+                (cond
+                 ((or (and (number? (cadr sre))
+                           (number? (caddr sre))
+                           (> (cadr sre) (caddr sre)))
+                      (and (not (cadr sre)) (caddr sre)))
+                  (return lo hi))
+                 (else
+                  (if (caddr sre)
+                      (lp (sre-sequence (cdddr sre)) n 0 0
+                          (lambda (lo2 hi2)
+                            (return (+ lo (* (cadr sre) lo2))
+                                    (and hi hi2 (+ hi (* (caddr sre) hi2))))))
+                      (lp (sre-sequence (cdddr sre)) n 0 0
+                          (lambda (lo2 hi2)
+                            (return (+ lo (* (cadr sre) lo2)) #f)))))))
+               ((+)
+                (lp (sre-sequence (cdr sre)) n lo hi
+                    (lambda (lo2 hi2)
+                      (return (+ lo lo2) #f))))
+               ((? ??)
+                (lp (sre-sequence (cdr sre)) n lo hi
+                    (lambda (lo2 hi2)
+                      (return lo (and hi hi2 (+ hi hi2))))))
+               ((= =? >= >=?)
+                (lp `(** ,(cadr sre)
+                         ,(if (memq (car sre) '(>= >=?)) #f (cadr sre))
+                         ,@(cddr sre))
+                    n lo hi return))
+               ((look-ahead neg-look-ahead look-behind neg-look-behind)
+                (return lo hi))
+               (else
+                (cond
+                 ((assq (car sre) sre-named-definitions)
+                  => (lambda (cell)
+                       (lp (apply (cdr cell) (cdr sre)) n lo hi return)))
+                 (else
+                  (%irregex-error 'sre-length-ranges "unknown sre operator" sre)))))))
+        ((char? sre)
+         (grow 1))
+        ((string? sre)
+         (grow (string-length sre)))
+        ((memq sre '(any nonl))
+         (grow 1))
+        ((memq sre '(epsilon bos eos bol eol bow eow nwb commit))
+         (return lo hi))
+        (else
+         (let ((cell (assq sre sre-named-definitions)))
+           (if cell
+               (lp (if (procedure? (cdr cell)) ((cdr cell)) (cdr cell))
+                   n lo hi return)
+               (%irregex-error 'sre-length-ranges "unknown sre" sre)))))))
+    sublens))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; SRE Manipulation
+
+;; build a (seq ls ...) sre from a list
+(define (sre-sequence ls)
+  (cond
+   ((null? ls) 'epsilon)
+   ((null? (cdr ls)) (car ls))
+   (else (cons 'seq ls))))
+
+;; build a (or ls ...) sre from a list
+(define (sre-alternate ls)
+  (cond
+   ((null? ls) '(or))
+   ((null? (cdr ls)) (car ls))
+   (else (cons 'or ls))))
+
+;; returns an equivalent SRE without any match information
+(define (sre-strip-submatches sre)
+  (if (not (pair? sre))
+      sre
+      (case (car sre)
+        (($ submatch) (sre-strip-submatches (sre-sequence (cdr sre))))
+        ((=> submatch-named) (sre-strip-submatches (sre-sequence (cddr sre))))
+        ((dsm) (sre-strip-submatches (sre-sequence (cdddr sre))))
+        (else (map sre-strip-submatches sre)))))
+
+;; given a char-set list of chars and strings, flattens them into
+;; chars only
+(define (sre-flatten-ranges ls)
+  (let lp ((ls ls) (res '()))
+    (cond
+     ((null? ls)
+      (reverse res))
+     ((string? (car ls))
+      (lp (append (string->list (car ls)) (cdr ls)) res))
+     (else
+      (lp (cdr ls) (cons (car ls) res))))))
+
+(define (sre-names sre n names)
+  (if (not (pair? sre))
+      names
+      (case (car sre)
+        (($ submatch)
+         (sre-names (sre-sequence (cdr sre)) (+ n 1) names))
+        ((=> submatch-named)
+         (sre-names (sre-sequence (cddr sre))
+                    (+ n 1)
+                    (cons (cons (cadr sre) n) names)))
+        ((dsm)
+         (sre-names (sre-sequence (cdddr sre)) (+ n (cadr sre)) names))
+        ((seq : or * + ? *? ?? w/case w/nocase atomic
+          look-ahead look-behind neg-look-ahead neg-look-behind)
+         (sre-sequence-names (cdr sre) n names))
+        ((= >=)
+         (sre-sequence-names (cddr sre) n names))
+        ((** **?)
+         (sre-sequence-names (cdddr sre) n names))
+        (else
+         names))))
+
+(define (sre-sequence-names ls n names)
+  (if (null? ls)
+      names
+      (sre-sequence-names (cdr ls)
+                          (+ n (sre-count-submatches (car ls)))
+                          (sre-names (car ls) n names))))
+
+(define (sre-remove-initial-bos sre)
+  (cond
+   ((pair? sre)
+    (case (car sre)
+      ((seq : $ submatch => submatch-named * +)
+       (cond
+        ((not (pair? (cdr sre)))
+         sre)
+        ((eq? 'bos (cadr sre))
+         (cons (car sre) (cddr sre)))
+        (else
+         (cons (car sre)
+               (cons (sre-remove-initial-bos (cadr sre)) (cddr sre))))))
+      ((or)
+       (sre-alternate (map sre-remove-initial-bos (cdr sre))))
+      (else
+       sre)))
+   (else
+    sre)))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Basic Matching
+
+(define irregex-basic-string-chunker
+  (make-irregex-chunker (lambda (x) #f)
+                        car
+                        cadr
+                        caddr
+                        (lambda (src1 i src2 j)
+                          (substring (car src1) i j))))
+
+(define (irregex-search x str . o)
+  (if (not (string? str)) (%irregex-error 'irregex-search "not a string" str))
+  (let ((start (or (and (pair? o) (car o)) 0))
+        (end (or (and (pair? o) (pair? (cdr o)) (cadr o)) (string-length str))))
+    (if (not (integer? start)) (%irregex-error 'irregex-search "not an integer" start))
+    (if (not (integer? end)) (%irregex-error 'irregex-search "not an integer" end))
+    (irregex-search/chunked x
+                            irregex-basic-string-chunker
+                            (list str start end)
+                            start)))
+
+(define (irregex-search/chunked x cnk src . o)
+  (let* ((irx (irregex x))
+         (matches (irregex-new-matches irx))
+         (i (if (pair? o) (car o) ((chunker-get-start cnk) src))))
+    (if (not (integer? i)) (%irregex-error 'irregex-search "not an integer" i))
+    (irregex-match-chunker-set! matches cnk)
+    (irregex-search/matches irx cnk src i matches)))
+
+;; internal routine, can be used in loops to avoid reallocating the
+;; match vector
+(define (irregex-search/matches irx cnk src i matches)
+  (cond
+   ((irregex-dfa irx)
+    (cond
+     ((flag-set? (irregex-flags irx) ~searcher?)
+      (cond
+       ((dfa-match/longest (irregex-dfa irx) cnk src i #f #f matches 0)
+        (irregex-match-start-chunk-set! matches 0 src)
+        (irregex-match-start-index-set! matches 0 i)
+        ((irregex-dfa/extract irx)
+         cnk src i
+         (%irregex-match-end-chunk matches 0)
+         (%irregex-match-end-index matches 0)
+         matches)
+        matches)
+       (else
+        #f)))
+     ((dfa-match/shortest
+       (irregex-dfa/search irx) cnk src i matches 0)
+      (let ((dfa (irregex-dfa irx))
+            (get-start (chunker-get-start cnk))
+            (get-end (chunker-get-end cnk))
+            (get-next (chunker-get-next cnk)))
+        (let lp1 ((src src) (i i))
+          (let ((end (get-end src)))
+            (let lp2 ((i i))
+              (cond
+               ((dfa-match/longest dfa cnk src i #f #f matches 0)
+                (irregex-match-start-chunk-set! matches 0 src)
+                (irregex-match-start-index-set! matches 0 i)
+                ((irregex-dfa/extract irx)
+                 cnk src i
+                 (%irregex-match-end-chunk matches 0)
+                 (%irregex-match-end-index matches 0)
+                 matches)
+                matches)
+               ((>= i end)
+                (let ((next (get-next src)))
+                  (and next (lp1 next (get-start next)))))
+               (else
+                (lp2 (+ i 1)))))))))
+     (else
+      #f)))
+   (else
+    (let ((res (irregex-search/backtrack irx cnk src i matches)))
+      (if res (%irregex-match-fail-set! res #f))
+      res))))
+
+(define (irregex-search/backtrack irx cnk src i matches)
+  (let ((matcher (irregex-nfa irx))
+        (str ((chunker-get-str cnk) src))
+        (end ((chunker-get-end cnk) src))
+        (get-next (chunker-get-next cnk))
+        (init (cons src i)))
+    (if (flag-set? (irregex-flags irx) ~searcher?)
+        (matcher cnk init src str i end matches (lambda () #f))
+        (let lp ((src2 src)
+                 (str str)
+                 (i i)
+                 (end end))
+          (cond
+           ((matcher cnk init src2 str i end matches (lambda () #f))
+            (irregex-match-start-chunk-set! matches 0 src2)
+            (irregex-match-start-index-set! matches 0 i)
+            matches)
+           ((< i end)
+            (lp src2 str (+ i 1) end))
+           (else
+            (let ((src2 (get-next src2)))
+              (if src2
+                  (lp src2
+                      ((chunker-get-str cnk) src2)
+                      ((chunker-get-start cnk) src2)
+                      ((chunker-get-end cnk) src2))
+                  #f))))))))
+
+(define (irregex-match irx str . o)
+  (if (not (string? str)) (%irregex-error 'irregex-match "not a string" str))
+  (let ((start (or (and (pair? o) (car o)) 0))
+        (end (or (and (pair? o) (pair? (cdr o)) (cadr o)) (string-length str))))
+    (irregex-match/chunked irx
+                           irregex-basic-string-chunker
+                           (list str start end))))
+
+(define (irregex-match/chunked irx cnk src)
+  (let* ((irx (irregex irx))
+         (matches (irregex-new-matches irx)))
+    (irregex-match-chunker-set! matches cnk)
+    (cond
+     ((irregex-dfa irx)
+      (and
+       (dfa-match/longest
+        (irregex-dfa irx) cnk src ((chunker-get-start cnk) src) #f #f matches 0)
+       (= ((chunker-get-end cnk) (%irregex-match-end-chunk matches 0))
+          (%irregex-match-end-index matches 0))
+       (begin
+         (irregex-match-start-chunk-set! matches 0 src)
+         (irregex-match-start-index-set! matches
+                                         0
+                                         ((chunker-get-start cnk) src))
+         ((irregex-dfa/extract irx)
+          cnk src ((chunker-get-start cnk) src)
+          (%irregex-match-end-chunk matches 0)
+          (%irregex-match-end-index matches 0)
+          matches)
+         matches)))
+     (else
+      (let* ((matcher (irregex-nfa irx))
+             (str ((chunker-get-str cnk) src))
+             (i ((chunker-get-start cnk) src))
+             (end ((chunker-get-end cnk) src))
+             (init (cons src i)))
+        (let lp ((m (matcher cnk init src str i end matches (lambda () #f))))
+          (and m
+               (cond
+                ((and (not ((chunker-get-next cnk)
+                            (%irregex-match-end-chunk m 0)))
+                      (= ((chunker-get-end cnk)
+                          (%irregex-match-end-chunk m 0))
+                         (%irregex-match-end-index m 0)))
+                 (%irregex-match-fail-set! m #f)
+                 m)
+                ((%irregex-match-fail m)
+                 (lp ((%irregex-match-fail m))))
+                (else
+                 #f)))))))))
+
+(define (irregex-match? . args)
+  (and (apply irregex-match args) #t))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; DFA Matching
+
+;; inline these
+(define (dfa-init-state dfa)
+  (vector-ref dfa 0))
+(define (dfa-next-state dfa node)
+  (vector-ref dfa (cdr node)))
+(define (dfa-final-state? dfa state)
+  (car state))
+
+;; this searches for the first end index for which a match is possible
+(define (dfa-match/shortest dfa cnk src start matches index)
+  (let ((get-str (chunker-get-str cnk))
+        (get-start (chunker-get-start cnk))
+        (get-end (chunker-get-end cnk))
+        (get-next (chunker-get-next cnk)))
+    (let lp1 ((src src) (start start) (state (dfa-init-state dfa)))
+      (and
+       src
+       (let ((str (get-str src))
+             (end (get-end src)))
+         (let lp2 ((i start) (state state))
+           (cond
+            ((dfa-final-state? dfa state)
+             (cond
+              (index
+               (irregex-match-end-chunk-set! matches index src)
+               (irregex-match-end-index-set! matches index i)))
+             #t)
+            ((< i end)
+             (let* ((ch (string-ref str i))
+                    (next (find (lambda (x)
+                                  (if (eqv? ch (car x))
+                                      #t
+                                      (and (pair? (car x))
+                                           (char<=? (caar x) ch)
+                                           (char<=? ch (cdar x)))))
+                                (cdr state))))
+               (and next (lp2 (+ i 1) (dfa-next-state dfa next)))))
+            (else
+             (let ((next (get-next src)))
+               (and next (lp1 next (get-start next) state)))))))))))
+
+;; this finds the longest match starting at a given index
+(define (dfa-match/longest dfa cnk src start end-src end matches index)
+  (let ((get-str (chunker-get-str cnk))
+        (get-start (chunker-get-start cnk))
+        (get-end (chunker-get-end cnk))
+        (get-next (chunker-get-next cnk))
+        (start-is-final? (dfa-final-state? dfa (dfa-init-state dfa))))
+    (cond
+     (index
+      (irregex-match-end-chunk-set! matches index #f)
+      (irregex-match-end-index-set! matches index #f)))
+    (let lp1 ((src src)
+              (start start)
+              (state (dfa-init-state dfa))
+              (res-src (and start-is-final? src))
+              (res-index (and start-is-final? start)))
+      (let ((str (get-str src))
+            (end (if (eq? src end-src) end (get-end src))))
+        (let lp2 ((i start)
+                  (state state)
+                  (res-src res-src)
+                  (res-index res-index))
+          (cond
+           ((>= i end)
+            (cond
+             ((and index res-src)
+              (irregex-match-end-chunk-set! matches index res-src)
+              (irregex-match-end-index-set! matches index res-index)))
+            (let ((next (and (not (eq? src end-src)) (get-next src))))
+              (if next
+                  (lp1 next (get-start next) state res-src res-index)
+                  (and index
+                       (%irregex-match-end-chunk matches index)
+                       #t))))
+           (else
+            (let* ((ch (string-ref str i))
+                   (cell (find (lambda (x)
+                                 (if (eqv? ch (car x))
+                                     #t
+                                     (and (pair? (car x))
+                                          (char<=? (caar x) ch)
+                                          (char<=? ch (cdar x)))))
+                               (cdr state))))
+              (cond
+               (cell
+                (let ((next (dfa-next-state dfa cell)))
+                  (if (dfa-final-state? dfa next)
+                      (lp2 (+ i 1) next src (+ i 1))
+                      (lp2 (+ i 1) next res-src res-index))))
+               (res-src
+                (cond
+                 (index
+                  (irregex-match-end-chunk-set! matches index res-src)
+                  (irregex-match-end-index-set! matches index res-index)))
+                #t)
+               ((and index (%irregex-match-end-chunk matches index))
+                #t)
+               (else
+                #f))))))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Named Definitions
+
+(define sre-named-definitions
+  `((any . ,*all-chars*)
+    (nonl . (- ,*all-chars* (,(string #\newline))))
+    (alphabetic . (/ #\a #\z #\A #\Z))
+    (alpha . alphabetic)
+    (alphanumeric . (/ #\a #\z #\A #\Z #\0 #\9))
+    (alphanum . alphanumeric)
+    (alnum . alphanumeric)
+    (lower-case . (/ #\a #\z))
+    (lower . lower-case)
+    (upper-case . (/ #\A #\Z))
+    (upper . upper-case)
+    (numeric . (/ #\0 #\9))
+    (num . numeric)
+    (digit . numeric)
+    (punctuation . (or #\! #\" #\# #\% #\& #\' #\( #\) #\* #\, #\- #\.
+                       #\/ #\: #\; #\? #\@ #\[ #\\ #\] #\_ #\{ #\}))
+    (punct . punctuation)
+    (graphic
+     . (or alphanumeric punctuation #\$ #\+ #\< #\= #\> #\^ #\` #\| #\~))
+    (graph . graphic)
+    (blank . (or #\space ,(integer->char (- (char->integer #\space) 23))))
+    (whitespace . (or blank #\newline))
+    (space . whitespace)
+    (white . whitespace)
+    (printing or graphic whitespace)
+    (print . printing)
+
+    ;; XXXX we assume a (possibly shifted) ASCII-based ordering
+    (control . (/ ,(integer->char (- (char->integer #\space) 32))
+                  ,(integer->char (- (char->integer #\space) 1))))
+    (cntrl . control)
+    (hex-digit . (or numeric (/ #\a #\f #\A #\F)))
+    (xdigit . hex-digit)
+    (ascii . (/ ,(integer->char (- (char->integer #\space) 32))
+                ,(integer->char (+ (char->integer #\space) 95))))
+    (ascii-nonl . (/ ,(integer->char (- (char->integer #\space) 32))
+                     ,(integer->char (- (char->integer #\newline) 1))
+                     ,(integer->char (+ (char->integer #\newline) 1))
+                     ,(integer->char (+ (char->integer #\space) 95))))
+    (newline . (or (seq ,(integer->char (+ (char->integer #\newline) 3))
+                        #\newline)
+                   (/ #\newline
+                      ,(integer->char (+ (char->integer #\newline) 3)))))
+
+    ;; ... it's really annoying to support old Scheme48
+    (word . (seq bow (+ (or alphanumeric #\_)) eow))
+    (utf8-tail-char . (/ ,(integer->char (+ (char->integer #\space) #x60))
+                         ,(integer->char (+ (char->integer #\space) #xA1))))
+    (utf8-2-char . (seq (/ ,(integer->char (+ (char->integer #\space) #xA2))
+                           ,(integer->char (+ (char->integer #\space) #xBF)))
+                        utf8-tail-char))
+    (utf8-3-char . (seq (/ ,(integer->char (+ (char->integer #\space) #xC0))
+                           ,(integer->char (+ (char->integer #\space) #xCF)))
+                        utf8-tail-char
+                        utf8-tail-char))
+    (utf8-4-char . (seq (/ ,(integer->char (+ (char->integer #\space) #xD0))
+                           ,(integer->char (+ (char->integer #\space) #xD7)))
+                        utf8-tail-char
+                        utf8-tail-char
+                        utf8-tail-char))
+    (utf8-any . (or ascii utf8-2-char utf8-3-char utf8-4-char))
+    (utf8-nonl . (or ascii-nonl utf8-2-char utf8-3-char utf8-4-char))
+
+    ;; extended library patterns
+    (integer . (seq (? (or #\+ #\-)) (+ numeric)))
+    (real . (seq (+ numeric) (? #\. (+ numeric)) (? (or #\e #\E) integer)))
+    ;; slightly more lax than R5RS, allow ->foo, etc.
+    (symbol-initial . (or alpha ("!$%&*/:<=>?^_~")))
+    (symbol-subsequent . (or symbol-initial digit ("+-.@")))
+    (symbol . (or (seq symbol-initial (* symbol-subsequent))
+                  (seq ("+-") (? symbol-initial (* symbol-subsequent)))
+                  (seq ".." (* "."))))
+    (sexp-space . (seq (* (* space) ";" (* nonl) newline) (+ space)))
+    (string . (seq #\" (escape #\\ #\") #\"))
+    (escape . ,(lambda (esc . o) `(* (or (~ ,esc ,@o) (seq ,esc any)))))
+
+    (ipv4-digit . (seq (? (/ "12")) (? numeric) numeric))
+    (ipv4-address . (seq ipv4-digit (= 3 #\. ipv4-digit)))
+    ;; XXXX lax, allows multiple double-colons or < 8 terms w/o a ::
+    (ipv6-address . (seq (** 0 4 hex-digit)
+                         (** 1 7 #\: (? #\:) (** 0 4 hex-digit))))
+    (ip-address . (or ipv4-address ipv6-address))
+    (domain-atom . (+ (or alphanumeric #\_ #\-)))
+    (domain . (seq domain-atom (+ #\. domain-atom)))
+    ;; XXXX now anything can be a top-level domain, but this is still handy
+    (top-level-domain . (w/nocase (or "arpa" "com" "gov" "mil" "net" "org"
+                                      "aero" "biz" "coop" "info" "museum"
+                                      "name" "pro" (= 2 alpha))))
+    (domain/common . (seq (+ domain-atom #\.) top-level-domain))
+    ;;(email-local-part . (seq (+ (or (~ #\") string))))
+    (email-local-part . (+ (or alphanumeric #\_ #\- #\. #\+)))
+    (email . (seq email-local-part #\@ domain))
+    (url-char . (or alnum #\_ #\- #\+ #\\ #\= #\~ #\. #\, #\& #\;
+                    (seq "%" hex-digit hex-digit)))
+    (url-final-char . (or alnum #\_ #\- #\+ #\\ #\= #\~ #\&
+                          (seq "%" hex-digit hex-digit)))
+    (http-url . (w/nocase
+                 "http" (? "s") "://"
+                 (or domain/common ipv4-address) ;; (seq "[" ipv6-address "]")
+                 (? ":" (+ numeric)) ;; port
+                 ;; path
+                 (? "/" (* url-char)
+                    (? "?" (* url-char))                      ;; query
+                    (? "#" (? (* url-char) url-final-char)) ;; fragment
+                    )))
+
+    ))
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; SRE->NFA compilation
+;;
+;; An NFA state is a numbered node with a list of patter->number
+;; transitions, where pattern is either a character, (lo . hi)
+;; character range, or epsilon (indicating an empty transition).
+;; There may be duplicate characters and overlapping ranges - since
+;; it's an NFA we process it by considering all possible transitions.
+
+(define *nfa-presize* 128)  ;; constant
+(define *nfa-num-fields* 4) ;; constant
+
+(define (nfa-num-states nfa) (quotient (vector-length nfa) *nfa-num-fields*))
+(define (nfa-start-state nfa) (- (nfa-num-states nfa) 1))
+
+(define (nfa-get-state-trans nfa i)
+  (vector-ref nfa (* i *nfa-num-fields*)))
+(define (nfa-set-state-trans! nfa i x)
+  (vector-set! nfa (* i *nfa-num-fields*) x))
+(define (nfa-push-state-trans! nfa i x)
+  (nfa-set-state-trans! nfa i (cons x (nfa-get-state-trans nfa i))))
+
+(define (nfa-get-epsilons nfa i)
+  (vector-ref nfa (+ (* i *nfa-num-fields*) 1)))
+(define (nfa-set-epsilons! nfa i x)
+  (vector-set! nfa (+ (* i *nfa-num-fields*) 1) x))
+(define (nfa-add-epsilon! nfa i x)
+  (let ((eps (nfa-get-epsilons nfa i)))
+    (if (not (memq x eps))
+        (nfa-set-epsilons! nfa i (cons x eps)))))
+
+(define (nfa-get-state-closure nfa i)
+  (vector-ref nfa (+ (* i *nfa-num-fields*) 2)))
+(define (nfa-set-state-closure! nfa i x)
+  (vector-set! nfa (+ (* i *nfa-num-fields*) 2) x))
+
+(define (nfa-get-closure nfa mst)
+  (cond ((assoc mst
+                (vector-ref nfa (+ (* (nfa-multi-state-hash nfa mst)
+                                      *nfa-num-fields*)
+                                   (- *nfa-num-fields* 1))))
+         => cdr)
+        (else #f)))
+(define (nfa-add-closure! nfa mst x)
+  (let ((i (+ (* (nfa-multi-state-hash nfa mst) *nfa-num-fields*)
+              (- *nfa-num-fields* 1))))
+    (vector-set! nfa i (cons (cons mst x) (vector-ref nfa i)))))
+
+;; Compile and return the vector of NFA states (in groups of
+;; *nfa-num-fields* packed elements).  The start state will be the
+;; last element(s) of the vector, and all remaining states will be in
+;; descending numeric order, with state 0 being the unique accepting
+;; state.
+(define (sre->nfa sre init-flags)
+  (let ((buf (make-vector (* *nfa-presize* *nfa-num-fields*) '())))
+    ;; we loop over an implicit sequence list
+    (define (lp ls n flags next)
+      (define (new-state-number state)
+        (max n (+ 1 state)))
+      (define (add-state! n2 trans-ls)
+        (if (>= (* n2 *nfa-num-fields*) (vector-length buf))
+            (let ((tmp (make-vector (* 2 (vector-length buf)) '())))
+              (do ((i (- (vector-length buf) 1) (- i 1)))
+                  ((< i 0))
+                (vector-set! tmp i (vector-ref buf i)))
+              (set! buf tmp)))
+        (nfa-set-state-trans! buf n2 trans-ls)
+        n2)
+      (define (extend-state! next . trans)
+        (and next
+             (add-state! (new-state-number next)
+                         (map (lambda (x) (cons x next)) trans))))
+      (define (add-char-state! next ch)
+        (let ((alt (char-altcase ch)))
+          (if (and (flag-set? flags ~case-insensitive?) (not (eqv? ch alt)))
+              (extend-state! next ch alt)
+              (extend-state! next ch))))
+      (if (null? ls)
+          next
+          (cond
+           ((or (eq? 'epsilon (car ls)) (equal? "" (car ls)))
+            ;; chars and epsilons go directly into the transition table
+            (let ((next (lp (cdr ls) n flags next)))
+              (and next
+                   (let ((new (add-state! (new-state-number next) '())))
+                     (nfa-add-epsilon! buf new next)
+                     new))))
+           ((string? (car ls))
+            ;; process literal strings a char at a time
+            (let ((next (lp (cdr ls) n flags next)))
+              (and next
+                   (let lp2 ((i (- (string-length (car ls)) 1))
+                             (next next))
+                     (if (< i 0)
+                         next
+                         (lp2 (- i 1)
+                              (add-char-state! next (string-ref (car ls) i))))
+                     ))))
+           ((char? (car ls))
+            (add-char-state! (lp (cdr ls) n flags next) (car ls)))
+           ((symbol? (car ls))
+            (let ((cell (assq (car ls) sre-named-definitions)))
+              (and cell
+                   (lp (cons (if (procedure? (cdr cell))
+                                 ((cdr cell))
+                                 (cdr cell))
+                             (cdr ls))
+                       n
+                       flags
+                       next))))
+           ((pair? (car ls))
+            (cond
+             ((string? (caar ls))
+              ;; enumerated character set
+              (lp (cons (sre-alternate (string->list (caar ls))) (cdr ls))
+                  n
+                  flags
+                  next))
+             (else
+              (case (caar ls)
+                ((seq :)
+                 ;; for an explicit sequence, just append to the list
+                 (lp (append (cdar ls) (cdr ls)) n flags next))
+                ((w/case w/nocase w/utf8 w/noutf8)
+                 (let* ((next (lp (cdr ls) n flags next))
+                        (flags ((if (memq (caar ls) '(w/case w/utf8))
+                                    flag-clear
+                                    flag-join)
+                                flags
+                                (if (memq (caar ls) '(w/case w/nocase))
+                                    ~case-insensitive?
+                                    ~utf8?))))
+                   (and next
+                        (lp (cdar ls) (new-state-number next) flags next))))
+                ((/ - & ~) 
+                 (let ((ranges
+                        (sre->cset (car ls)
+                                   (flag-set? flags ~case-insensitive?))))
+                   (case (length ranges)
+                     ((1)
+                      (extend-state! (lp (cdr ls) n flags next) (car ranges)))
+                     (else
+                      (let ((next (lp (cdr ls) n flags next)))
+                        (and
+                         next
+                         (lp (list (sre-alternate
+                                    (map (lambda (x) (if (pair? x)
+                                                     (list '/ (car x) (cdr x))
+                                                     x))
+                                         ranges)))
+                             (new-state-number next)
+                             (flag-clear flags ~case-insensitive?)
+                             next)))))))
+                ((or)
+                 (let ((next (lp (cdr ls) n flags next)))
+                   (and
+                    next
+                    (if (null? (cdar ls))
+                        ;; empty (or) always fails
+                        (add-state! (new-state-number next) '())
+                        ;; compile both branches and insert epsilon
+                        ;; transitions to either
+                        (let* ((b (lp (list (sre-alternate (cddar ls)))
+                                      (new-state-number next)
+                                      flags
+                                      next))
+                               (a (and b
+                                       (lp (list (cadar ls))
+                                           (new-state-number (max b next))
+                                           flags
+                                           next))))
+                          (and a
+                               (let ((c (add-state! (new-state-number a) '())))
+                                 (nfa-add-epsilon! buf c a)
+                                 (nfa-add-epsilon! buf c b)
+                                 c)))))))
+                ((?)
+                 (let ((next (lp (cdr ls) n flags next)))
+                   ;; insert an epsilon transition directly to next
+                   (and
+                    next
+                    (let ((a (lp (cdar ls) (new-state-number next) flags next)))
+                      (if a
+                          (nfa-add-epsilon! buf a next))
+                      a))))
+                ((+ *)
+                 (let ((next (lp (cdr ls) n flags next)))
+                   (and
+                    next
+                    (let* ((new (lp '(epsilon)
+                                    (new-state-number next)
+                                    flags
+                                    next))
+                           (a (lp (cdar ls) (new-state-number new) flags new)))
+                      (cond
+                       (a
+                        ;; for *, insert an epsilon transition as in ? above
+                        (if (eq? '* (caar ls))
+                            (nfa-add-epsilon! buf a new))
+                        ;; for both, insert a loop back to self
+                        (nfa-add-epsilon! buf new a)))
+                      a))))
+                ;; need to add these to the match extractor first,
+                ;; but they tend to generate large DFAs
+                ;;((=)
+                ;; (lp (append (vector->list
+                ;;              (make-vector (cadar ls)
+                ;;                           (sre-sequence (cddar ls))))
+                ;;             (cdr ls))
+                ;;     n flags next))
+                ;;((>=)
+                ;; (lp (append (vector->list
+                ;;              (make-vector (- (cadar ls) 1)
+                ;;                           (sre-sequence (cddar ls))))
+                ;;             (cons `(+ ,@(cddar ls)) (cdr ls)))
+                ;;     n flags next))
+                ;;((**)
+                ;; (lp (append (vector->list
+                ;;              (make-vector (cadar ls)
+                ;;                           (sre-sequence (cdddar ls))))
+                ;;             (map
+                ;;              (lambda (x) `(? ,x))
+                ;;              (vector->list
+                ;;               (make-vector (- (caddar ls) (cadar ls))
+                ;;                            (sre-sequence (cdddar ls)))))
+                ;;             (cdr ls))
+                ;;     n flags next))
+                 ;; ignore submatches altogether
+                (($ submatch)
+                 (lp (cons (sre-sequence (cdar ls)) (cdr ls)) n flags next))
+                ((=> submatch-named)
+                 (lp (cons (sre-sequence (cddar ls)) (cdr ls)) n flags next))
+                (else
+                 (cond
+                  ((assq (caar ls) sre-named-definitions)
+                   => (lambda (cell)
+                        (if (procedure? (cdr cell))
+                            (lp (cons (apply (cdr cell) (cdar ls)) (cdr ls))
+                                n flags next)
+                            (%irregex-error "non-procedure in op position" (caar ls)))))
+                  (else #f)))))))
+           (else
+            #f))))
+    (let ((len (lp (list sre) 1 init-flags 0)))
+      (and len
+           (let ((nfa (make-vector (* *nfa-num-fields* (+ len 1)))))
+             (do ((i (- (vector-length nfa) 1) (- i 1)))
+                 ((< i 0))
+               (vector-set! nfa i (vector-ref buf i)))
+             nfa)))))
+
+;; We don't really want to use this, we use the closure compilation
+;; below instead, but this is included for reference and testing the
+;; sre->nfa conversion.
+
+;; (define (nfa-match nfa str)
+;;   (let lp ((ls (string->list str)) (state (car nfa)) (epsilons '()))
+;;     (if (null? ls)
+;;         (zero? (car state))
+;;         (any (lambda (m)
+;;                (if (eq? 'epsilon (car m))
+;;                    (and (not (memv (cdr m) epsilons))
+;;                         (lp ls (assv (cdr m) nfa) (cons (cdr m) epsilons)))
+;;                    (and (or (eqv? (car m) (car ls))
+;;                             (and (pair? (car m))
+;;                                  (char<=? (caar m) (car ls))
+;;                                  (char<=? (car ls) (cdar m))))
+;;                         (lp (cdr ls) (assv (cdr m) nfa) '()))))
+;;              (cdr state)))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; NFA multi-state representation
+
+;; Cache closures in a simple hash-table keyed on the smallest state
+;; (define (nfa-multi-state-hash nfa mst)
+;;   (car mst))
+
+;; Original sorted list-based representation
+
+;; (define (make-nfa-multi-state nfa)
+;;   '())
+
+;; (define (nfa-state->multi-state nfa state)
+;;   (list state))
+
+;; (define (nfa-multi-state-copy mst)
+;;   (map (lambda (x) x) mst))
+
+;; (define (list->nfa-multi-state nfa ls)
+;;   (nfa-multi-state-copy ls))
+
+;; (define (nfa-multi-state-contains? mst i)
+;;   (memq i mst))
+
+;; (define (nfa-multi-state-fold mst kons knil)
+;;   (fold kons knil mst))
+
+;; (define (nfa-multi-state-add! mst i)
+;;   (insert-sorted i mst))
+
+;; (define (nfa-multi-state-add mst i)
+;;   (insert-sorted i mst))
+
+;; (define (nfa-multi-state-union a b)
+;;   (merge-sorted a b))
+
+;; Sorted List Utilities
+
+;; (define (insert-sorted n ls)
+;;   (cond
+;;    ((null? ls)
+;;     (cons n '()))
+;;    ((<= n (car ls))
+;;     (if (= n (car ls))
+;;         ls
+;;         (cons n ls)))
+;;    (else
+;;     (cons (car ls) (insert-sorted n (cdr ls))))))
+
+;; (define (insert-sorted! n ls)
+;;   (cond
+;;    ((null? ls)
+;;     (cons n '()))
+;;    ((<= n (car ls))
+;;     (if (= n (car ls))
+;;         ls
+;;         (cons n ls)))
+;;    (else
+;;     (let lp ((head ls) (tail (cdr ls)))
+;;       (cond ((or (null? tail) (< n (car tail)))
+;;              (set-cdr! head (cons n tail)))
+;;             ((> n (car tail))
+;;              (lp tail (cdr tail)))))
+;;     ls)))
+
+;; (define (merge-sorted a b)
+;;   (cond ((null? a) b)
+;;         ((null? b) a)
+;;         ((< (car a) (car b))
+;;          (cons (car a) (merge-sorted (cdr a) b)))
+;;         ((> (car a) (car b))
+;;          (cons (car b) (merge-sorted a (cdr b))))
+;;         (else (merge-sorted (cdr a) b))))
+
+;; ========================================================= ;;
+
+;; Presized bit-vector based
+
+(define (nfa-multi-state-hash nfa mst)
+  (modulo (vector-ref mst 0) (nfa-num-states nfa)))
+
+(define (make-nfa-multi-state nfa)
+  (make-vector (quotient (+ (nfa-num-states nfa) 24 -1) 24) 0))
+
+(define (nfa-state->multi-state nfa state)
+  (nfa-multi-state-add! (make-nfa-multi-state nfa) state))
+
+(define (nfa-multi-state-copy mst)
+  (let ((res (make-vector (vector-length mst))))
+    (do ((i (- (vector-length mst) 1) (- i 1)))
+        ((< i 0) res)
+      (vector-set! res i (vector-ref mst i)))))
+
+(define (nfa-multi-state-contains? mst i)
+  (let ((cell (quotient i 24))
+        (bit (remainder i 24)))
+    (not (zero? (bit-and (vector-ref mst cell) (bit-shl 1 bit))))))
+
+(define (nfa-multi-state-add! mst i)
+  (let ((cell (quotient i 24))
+        (bit (remainder i 24)))
+    (vector-set! mst cell (bit-ior (vector-ref mst cell) (bit-shl 1 bit)))
+    mst))
+
+(define (nfa-multi-state-add mst i)
+  (nfa-multi-state-add! (nfa-multi-state-copy mst) i))
+
+(define (nfa-multi-state-union! a b)
+  (do ((i (- (vector-length a) 1) (- i 1)))
+      ((< i 0) a)
+    (vector-set! a i (bit-ior (vector-ref a i) (vector-ref b i)))))
+
+(define (nfa-multi-state-union a b)
+  (nfa-multi-state-union! (nfa-multi-state-copy a) b))
+
+(define (nfa-multi-state-fold mst kons knil)
+  (let ((limit (vector-length mst)))
+    (let lp1 ((i 0)
+              (acc knil))
+      (if (>= i limit)
+          acc
+          (let lp2 ((n (vector-ref mst i))
+                    (acc acc))
+            (if (zero? n)
+                (lp1 (+ i 1) acc)
+                (let* ((n2 (bit-and n (- n 1)))
+                       (n-tail (- n n2))
+                       (bit (+ (* i 24) (integer-log n-tail))))
+                  (lp2 n2 (kons bit acc)))))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; NFA->DFA compilation
+;;
+;; During processing, the DFA is a list of the form:
+;;
+;;   ((NFA-states ...) accepting-state? transitions ...)
+;;
+;; where the transitions are as in the NFA, except there are no
+;; epsilons, duplicate characters or overlapping char-set ranges, and
+;; the states moved to are closures (sets of NFA states).  Multiple
+;; DFA states may be accepting states.
+
+(define (nfa->dfa nfa . o)
+  (let ((max-states (and (pair? o) (car o))))
+    (let lp ((ls (list (nfa-cache-state-closure! nfa (nfa-start-state nfa))))
+             (i 0)
+             (res '()))
+      (cond
+       ((null? ls)
+        (dfa-renumber nfa (reverse res)))
+       ((assoc (car ls) res) ;; already seen this combination of states
+        (lp (cdr ls) i res))
+       ((and max-states (> i max-states)) ;; too many DFA states
+        #f)
+       (else
+        (let* ((states (car ls))
+               (trans (nfa-state-transitions nfa states))
+               (accept? (and (nfa-multi-state-contains? states 0) #t)))
+          (lp (append (map cdr trans) (cdr ls))
+              (+ i 1)
+              `((,states ,accept? ,@trans) ,@res))))))))
+
+;; When the conversion is complete we renumber the DFA sets-of-states
+;; in order and convert the result to a vector for fast lookup.
+(define (dfa-renumber nfa dfa)
+  (let* ((len (length dfa))
+         (states (make-vector (nfa-num-states nfa) '()))
+         (res (make-vector len)))
+    (define (renumber mst)
+      (cdr (assoc mst (vector-ref states (nfa-multi-state-hash nfa mst)))))
+    (let lp ((ls dfa) (i 0))
+      (cond ((pair? ls)
+             (let ((j (nfa-multi-state-hash nfa (caar ls))))
+               (vector-set! states j (cons (cons (caar ls) i)
+                                           (vector-ref states j))))
+             (lp (cdr ls) (+ i 1)))))
+    (let lp ((ls dfa) (i 0))
+      (cond ((pair? ls)
+             (for-each
+              (lambda (x) (set-cdr! x (renumber (cdr x))))
+              (cddar ls))
+             (vector-set! res i (cdar ls))
+             (lp (cdr ls) (+ i 1)))))
+    res))
+
+;; Extract all distinct characters or ranges and the potential states
+;; they can transition to from a given set of states.  Any ranges that
+;; would overlap with distinct characters are split accordingly.
+(define (nfa-state-transitions nfa states)
+  (let ((res (nfa-multi-state-fold
+              states
+              (lambda (st res)
+                (fold (lambda (trans res)
+                        (nfa-join-transitions! nfa res (car trans) (cdr trans)))
+                      res
+                      (nfa-get-state-trans nfa st)))
+              '())))
+    (for-each (lambda (x) (set-cdr! x (nfa-closure nfa (cdr x)))) res)
+    res))
+
+(define (nfa-join-transitions! nfa existing elt state)
+  (define (join! ls elt state)
+    (if (not elt)
+        ls
+        (nfa-join-transitions! nfa ls elt state)))
+  (cond
+   ((char? elt)
+    (let lp ((ls existing) (res '()))
+      (cond
+       ((null? ls)
+        ;; done, just cons this on to the original list
+        (cons (cons elt (nfa-state->multi-state nfa state)) existing))
+       ((eq? elt (caar ls))
+        ;; add a new state to an existing char
+        (set-cdr! (car ls) (nfa-multi-state-add! (cdar ls) state))
+        existing)
+       ((and (pair? (caar ls))
+             (char<=? (caaar ls) elt)
+             (char<=? elt (cdaar ls)))
+        ;; split a range
+        (apply
+         (lambda (left right)
+           (let ((left-copy (nfa-multi-state-copy (cdar ls)))
+                 (right-copy (nfa-multi-state-copy (cdar ls))))
+             (cons (cons elt (nfa-multi-state-add! (cdar ls) state))
+                   (append (if left (list (cons left left-copy)) '())
+                           (if right (list (cons right right-copy)) '())
+                           res
+                           (cdr ls)))))
+         (split-char-range (caar ls) elt)))
+       (else
+        ;; keep looking
+        (lp (cdr ls) (cons (car ls) res))))))
+   (else
+    (let ((lo (car elt))
+          (hi (cdr elt)))
+      (let lp ((ls existing) (res '()))
+        (cond
+         ((null? ls)
+          ;; done, just cons this on to the original list
+          (cons (cons elt (nfa-state->multi-state nfa state)) existing))
+         ((and (char? (caar ls)) (char<=? lo (caar ls)) (char<=? (caar ls) hi))
+          ;; range enclosing a character
+          (apply
+           (lambda (left right)
+             (set-cdr! (car ls) (nfa-multi-state-add! (cdar ls) state))
+             (join! (join! existing left state) right state))
+           (split-char-range elt (caar ls))))
+         ((and (pair? (caar ls))
+               (or (and (char<=? (caaar ls) hi) (char<=? lo (cdaar ls)))
+                   (and (char<=? hi (caaar ls)) (char<=? (cdaar ls) lo))))
+          ;; overlapping ranges
+          (apply
+           (lambda (left1 left2 same right1 right2) ;; 5 regions
+             (let ((right1-copy (nfa-multi-state-copy (cdar ls)))
+                   (right2-copy (nfa-multi-state-copy (cdar ls))))
+               (set-car! (car ls) same)
+               (set-cdr! (car ls) (nfa-multi-state-add! (cdar ls) state))
+               (let* ((res (if right1
+                               (cons (cons right1 right1-copy) existing)
+                               existing))
+                      (res (if right2
+                               (cons (cons right2 right2-copy) res)
+                               res)))
+                 (join! (join! res left1 state) left2 state))))
+           (intersect-char-ranges elt (caar ls))))
+         (else
+          (lp (cdr ls) (cons (car ls) res)))))))))
+
+(define (char-range c1 c2)
+  (if (eqv? c1 c2) c1 (cons c1 c2)))
+
+;; assumes ch is included in the range
+(define (split-char-range range ch)
+  (list
+   (and (not (eqv? ch (car range)))
+        (char-range (car range) (integer->char (- (char->integer ch) 1))))
+   (and (not (eqv? ch (cdr range)))
+        (char-range (integer->char (+ (char->integer ch) 1)) (cdr range)))))
+
+;; returns 5 (possibly #f) char ranges:
+;;    a-only-1  a-only-2  a-and-b  b-only-1  b-only-2
+(define (intersect-char-ranges a b)
+  (if (char>? (car a) (car b))
+      (reverse (intersect-char-ranges b a))
+      (let ((a-lo (car a))
+            (a-hi (cdr a))
+            (b-lo (car b))
+            (b-hi (cdr b)))
+        (list
+         (and (char<? a-lo b-lo)
+              (char-range a-lo (integer->char (- (char->integer b-lo) 1))))
+         (and (char>? a-hi b-hi)
+              (char-range (integer->char (+ (char->integer b-hi) 1)) a-hi))
+         (char-range b-lo (if (char<? b-hi a-hi) b-hi a-hi))
+         #f
+         (and (char>? b-hi a-hi)
+              (char-range (integer->char (+ (char->integer a-hi) 1)) b-hi))))))
+
+(define (nfa-cache-state-closure! nfa state)
+  (let ((cached (nfa-get-state-closure nfa state)))
+    (cond
+     ((not (null? cached))
+      cached)
+     (else
+      (let ((res (nfa-state-closure-internal nfa state)))
+        (nfa-set-state-closure! nfa state res)
+        res)))))
+
+;; The `closure' of a list of NFA states - all states that can be
+;; reached from any of them using any number of epsilon transitions.
+(define (nfa-state-closure-internal nfa state)
+  (let lp ((ls (list state))
+           (res (make-nfa-multi-state nfa)))
+    (cond
+     ((null? ls)
+      res)
+     ((nfa-multi-state-contains? res (car ls))
+      (lp (cdr ls) res))
+     (else
+      (lp (append (nfa-get-epsilons nfa (car ls)) (cdr ls))
+          (nfa-multi-state-add! res (car ls)))))))
+
+(define (nfa-closure-internal nfa states)
+  (nfa-multi-state-fold
+   states
+   (lambda (st res)
+     (nfa-multi-state-union! res (nfa-cache-state-closure! nfa st)))
+   (make-nfa-multi-state nfa)))
+
+(define (nfa-closure nfa states)
+  (or (nfa-get-closure nfa states)
+      (let ((res (nfa-closure-internal nfa states)))
+        (nfa-add-closure! nfa states res)
+        res)))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Match Extraction
+;;
+;; DFAs don't give us match information, so once we match and
+;; determine the start and end, we need to recursively break the
+;; problem into smaller DFAs to get each submatch.
+;;
+;; See http://compilers.iecc.com/comparch/article/07-10-026
+
+(define (match-vector-ref v i) (vector-ref v (+ 3 i)))
+
+(define (match-vector-set! v i x) (vector-set! v (+ 3 i) x))
+
+(define (sre-match-extractor sre num-submatches)
+  (let* ((tmp (+ num-submatches 1))
+         (tmp-end-src-offset (+ 2 (* tmp 4)))
+         (tmp-end-index-offset (+ 3 (* tmp 4))))
+    (let lp ((sre sre) (n 1) (submatch-deps? #f))
+      (cond
+       ((not (sre-has-submatches? sre))
+        (if (not submatch-deps?)
+            (lambda (cnk start i end j matches) #t)
+            (let ((dfa (nfa->dfa (sre->nfa sre ~none))))
+              (lambda (cnk start i end j matches)
+                (dfa-match/longest dfa cnk start i end j matches tmp)))))
+       ((pair? sre)
+        (case (car sre)
+          ((: seq)
+           (let* ((right (sre-sequence (cddr sre)))
+                  (match-left (lp (cadr sre) n #t))
+                  (match-right
+                   (lp right (+ n (sre-count-submatches (cadr sre))) #t)))
+             (lambda (cnk start i end j matches)
+               (let lp1 ((end2 end) (j2 j) (best-src #f) (best-index #f))
+                 (let ((limit (if (eq? start end2)
+                                  i
+                                  ((chunker-get-start cnk) end2))))
+                   (let lp2 ((k j2) (best-src best-src) (best-index best-index))
+                     (if (< k limit)
+                         (cond
+                          ((not (eq? start end2))
+                           (let ((prev (chunker-prev-chunk cnk start end2)))
+                             (lp1 prev
+                                  ((chunker-get-end cnk) prev)
+                                  best-src
+                                  best-index)))
+                          (best-src
+                           (match-vector-set! matches tmp-end-src-offset best-src)
+                           (match-vector-set! matches tmp-end-index-offset best-index)
+                           #t)
+                          (else
+                           #f))
+                         (if (and (match-left cnk start i end2 k matches)
+                                  (eq? end2 (match-vector-ref matches
+                                                        tmp-end-src-offset))
+                                  (eqv? k (match-vector-ref matches
+                                                      tmp-end-index-offset))
+                                  (match-right cnk end2 k end j matches))
+                             (let ((right-src
+                                    (match-vector-ref matches tmp-end-src-offset))
+                                   (right
+                                    (match-vector-ref matches tmp-end-index-offset)))
+                               (cond
+                                ((and (eq? end right-src) (eqv? j right))
+                                 (match-vector-set! matches tmp-end-src-offset end)
+                                 (match-vector-set! matches tmp-end-index-offset j)
+                                 #t)
+                                ((or (not best-src)
+                                     (if (eq? best-src right-src)
+                                         (> right best-index)
+                                         (chunk-before? cnk
+                                                        best-src
+                                                        right-src)))
+                                 (lp2 (- k 1) right-src right))
+                                (else
+                                 (lp2 (- k 1) best-src best-index))))
+                             (lp2 (- k 1) best-src best-index)))))))))
+          ((or)
+           (if (null? (cdr sre))
+               (lambda (cnk start i end j matches) #f)
+               (let* ((rest (sre-alternate (cddr sre)))
+                      (match-first
+                       (lp (cadr sre) n #t))
+                      (match-rest
+                       (lp rest
+                           (+ n (sre-count-submatches (cadr sre)))
+                           submatch-deps?)))
+                 (lambda (cnk start i end j matches)
+                   (or (and (match-first cnk start i end j matches)
+                            (eq? end (match-vector-ref matches tmp-end-src-offset))
+                            (eqv? j (match-vector-ref matches tmp-end-index-offset)))
+                       (match-rest cnk start i end j matches))))))
+          ((* +)
+           (letrec ((match-once
+                     (lp (sre-sequence (cdr sre)) n #t))
+                    (match-all
+                     (lambda (cnk start i end j matches)
+                       (if (match-once cnk start i end j matches)
+                           (let ((src (match-vector-ref matches tmp-end-src-offset))
+                                 (k (match-vector-ref matches tmp-end-index-offset)))
+                             (if (and src (or (not (eq? start src)) (< i k)))
+                                 (match-all cnk src k end j matches)
+                                 #t))
+                           (begin
+                             (match-vector-set! matches tmp-end-src-offset start)
+                             (match-vector-set! matches tmp-end-index-offset i)
+                             #t)))))
+             (if (eq? '* (car sre))
+                 match-all
+                 (lambda (cnk start i end j matches)
+                   (and (match-once cnk start i end j matches)
+                        (let ((src (match-vector-ref matches tmp-end-src-offset))
+                              (k (match-vector-ref matches tmp-end-index-offset)))
+                          (match-all cnk src k end j matches)))))))
+          ((?)
+           (let ((match-once (lp (sre-sequence (cdr sre)) n #t)))
+             (lambda (cnk start i end j matches)
+               (cond
+		((match-once cnk start i end j matches)
+		 #t)
+		(else
+		 (match-vector-set! matches tmp-end-src-offset start)
+		 (match-vector-set! matches tmp-end-index-offset i)
+		 #t)))))
+          (($ submatch => submatch-named)
+           (let ((match-one
+                  (lp (sre-sequence (if (memq (car sre) '($ submatch))
+                                        (cdr sre)
+                                        (cddr sre)))
+                      (+ n 1)
+                      #t))
+                 (start-src-offset (* n 4))
+                 (start-index-offset (+ 1 (* n 4)))
+                 (end-src-offset (+ 2 (* n 4)))
+                 (end-index-offset (+ 3 (* n 4))))
+             (lambda (cnk start i end j matches)
+               (cond
+                ((match-one cnk start i end j matches)
+                 (match-vector-set! matches start-src-offset start)
+                 (match-vector-set! matches start-index-offset i)
+                 (match-vector-set! matches end-src-offset
+                              (match-vector-ref matches tmp-end-src-offset))
+                 (match-vector-set! matches end-index-offset
+                              (match-vector-ref matches tmp-end-index-offset))
+                 #t)
+                (else
+                 #f)))))
+          (else
+           (%irregex-error "unknown regexp operator" (car sre)))))
+       (else
+        (%irregex-error "unknown regexp" sre))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Closure Compilation
+;;
+;; We use this for non-regular expressions instead of an interpreted
+;; NFA matcher.  We use backtracking anyway, but this gives us more
+;; freedom of implementation, allowing us to support patterns that
+;; can't be represented in the above NFA representation.
+
+(define (sre->procedure sre . o)
+  (define names
+    (if (and (pair? o) (pair? (cdr o))) (cadr o) (sre-names sre 1 '())))
+  (let lp ((sre sre)
+           (n 1)
+           (flags (if (pair? o) (car o) ~none))
+           (next (lambda (cnk init src str i end matches fail)
+                   (irregex-match-start-chunk-set! matches 0 (car init))
+                   (irregex-match-start-index-set! matches 0 (cdr init))
+                   (irregex-match-end-chunk-set! matches 0 src)
+                   (irregex-match-end-index-set! matches 0 i)
+		   (%irregex-match-fail-set! matches fail)
+                   matches)))
+    ;; XXXX this should be inlined
+    (define (rec sre) (lp sre n flags next))
+    (cond
+     ((pair? sre)
+      (if (string? (car sre))
+          (sre-cset->procedure
+           (sre->cset (car sre) (flag-set? flags ~case-insensitive?))
+           next)
+          (case (car sre)
+            ((~ - & /)
+             (sre-cset->procedure
+              (sre->cset sre (flag-set? flags ~case-insensitive?))
+              next))
+            ((or)
+             (case (length (cdr sre))
+               ((0) (lambda (cnk init src str i end matches fail) (fail)))
+               ((1) (rec (cadr sre)))
+               (else
+                (let* ((first (rec (cadr sre)))
+                       (rest (lp (sre-alternate (cddr sre))
+                                 (+ n (sre-count-submatches (cadr sre)))
+                                 flags
+                                 next)))
+                  (lambda (cnk init src str i end matches fail)
+                    (first cnk init src str i end matches
+                           (lambda ()
+                             (rest cnk init src str i end matches fail))))))))
+            ((w/case)
+             (lp (sre-sequence (cdr sre))
+                 n
+                 (flag-clear flags ~case-insensitive?)
+                 next))
+            ((w/nocase)
+             (lp (sre-sequence (cdr sre))
+                 n
+                 (flag-join flags ~case-insensitive?)
+                 next))
+            ((w/utf8)
+             (lp (sre-sequence (cdr sre)) n (flag-join flags ~utf8?) next))
+            ((w/noutf8)
+             (lp (sre-sequence (cdr sre)) n (flag-clear flags ~utf8?) next))
+            ((seq :)
+             (case (length (cdr sre))
+               ((0) next)
+               ((1) (rec (cadr sre)))
+               (else
+                (let ((rest (lp (sre-sequence (cddr sre))
+                                (+ n (sre-count-submatches (cadr sre)))
+                                flags
+                                next)))
+                  (lp (cadr sre) n flags rest)))))
+            ((?)
+             (let ((body (rec (sre-sequence (cdr sre)))))
+               (lambda (cnk init src str i end matches fail)
+                 (body cnk init src str i end matches
+                       (lambda () (next cnk init src str i end matches fail))))))
+            ((??)
+             (let ((body (rec (sre-sequence (cdr sre)))))
+               (lambda (cnk init src str i end matches fail)
+                 (next cnk init src str i end matches
+                       (lambda () (body cnk init src str i end matches fail))))))
+            ((*)
+             (cond
+              ((sre-empty? (sre-sequence (cdr sre)))
+               (%irregex-error "invalid sre: empty *" sre))
+              (else
+               (letrec
+                   ((body
+                     (lp (sre-sequence (cdr sre))
+                         n
+                         flags
+                         (lambda (cnk init src str i end matches fail)
+                           (body cnk init src str i end matches
+                                 (lambda ()
+                                   (next cnk init src str i end matches fail)
+                                   ))))))
+                 (lambda (cnk init src str i end matches fail)
+                   (body cnk init src str i end matches
+                         (lambda ()
+                           (next cnk init src str i end matches fail))))))))
+            ((*?)
+             (cond
+              ((sre-empty? (sre-sequence (cdr sre)))
+               (%irregex-error "invalid sre: empty *?" sre))
+              (else
+               (letrec
+                   ((body
+                     (lp (sre-sequence (cdr sre))
+                         n
+                         flags
+                         (lambda (cnk init src str i end matches fail)
+                           (next cnk init src str i end matches
+                                 (lambda ()
+                                   (body cnk init src str i end matches fail)
+                                   ))))))
+                 (lambda (cnk init src str i end matches fail)
+                   (next cnk init src str i end matches
+                         (lambda ()
+                           (body cnk init src str i end matches fail))))))))
+            ((+)
+             (lp (sre-sequence (cdr sre))
+                 n
+                 flags
+                 (rec (list '* (sre-sequence (cdr sre))))))
+            ((=)
+             (rec `(** ,(cadr sre) ,(cadr sre) ,@(cddr sre))))
+            ((>=)
+             (rec `(** ,(cadr sre) #f ,@(cddr sre))))
+            ((** **?)
+             (cond
+              ((or (and (number? (cadr sre))
+                        (number? (caddr sre))
+                        (> (cadr sre) (caddr sre)))
+                   (and (not (cadr sre)) (caddr sre)))
+               (lambda (cnk init src str i end matches fail) (fail)))
+              (else
+               (let* ((from (cadr sre))
+                      (to (caddr sre))
+                      (? (if (eq? '** (car sre)) '? '??))
+                      (* (if (eq? '** (car sre)) '* '*?))
+                      (sre (sre-sequence (cdddr sre)))
+                      (x-sre (sre-strip-submatches sre))
+                      (next (if to
+                                (if (= from to)
+                                    next
+                                    (fold (lambda (x next)
+                                            (lp `(,? ,sre) n flags next))
+                                          next
+                                          (zero-to (- to from))))
+                                (rec `(,* ,sre)))))
+                 (if (zero? from)
+                     next
+                     (lp `(seq ,@(map (lambda (x) x-sre) (zero-to (- from 1)))
+                               ,sre)
+                         n
+                         flags
+                         next))))))
+            ((word)
+             (rec `(seq bow ,@(cdr sre) eow)))
+            ((word+)
+             (rec `(seq bow (+ (& (or alphanumeric "_")
+                                  (or ,@(cdr sre)))) eow)))
+            ((posix-string)
+             (rec (string->sre (cadr sre))))
+            ((look-ahead)
+             (let ((check
+                    (lp (sre-sequence (cdr sre))
+                        n
+                        flags
+                        (lambda (cnk init src str i end matches fail) i))))
+               (lambda (cnk init src str i end matches fail)
+                 (if (check cnk init src str i end matches (lambda () #f))
+                     (next cnk init src str i end matches fail)
+                     (fail)))))
+            ((neg-look-ahead)
+             (let ((check
+                    (lp (sre-sequence (cdr sre))
+                        n
+                        flags
+                        (lambda (cnk init src str i end matches fail) i))))
+               (lambda (cnk init src str i end matches fail)
+                 (if (check cnk init src str i end matches (lambda () #f))
+                     (fail)
+                     (next cnk init src str i end matches fail)))))
+            ((look-behind neg-look-behind)
+             (let ((check
+                    (lp (sre-sequence
+                         (cons '(* any) (append (cdr sre) '(eos))))
+                        n
+                        flags
+                        (lambda (cnk init src str i end matches fail) i))))
+               (lambda (cnk init src str i end matches fail)
+                 (let* ((prev ((chunker-get-substring cnk)
+                               (car init)
+                               (cdr init)
+                               src
+                               i))
+                        (len (string-length prev))
+                        (src2 (list prev 0 len)))
+                   (if ((if (eq? (car sre) 'look-behind) (lambda (x) x) not)
+                        (check irregex-basic-string-chunker
+                               (cons src2 0) src2 prev 0 len matches (lambda () #f)))
+                       (next cnk init src str i end matches fail)
+                       (fail))))))
+            ((atomic)
+             (let ((once
+                    (lp (sre-sequence (cdr sre))
+                        n
+                        flags
+                        (lambda (cnk init src str i end matches fail) i))))
+               (lambda (cnk init src str i end matches fail)
+                 (let ((j (once cnk init src str i end matches (lambda () #f))))
+                   (if j
+                       (next cnk init src str j end matches fail)
+                       (fail))))))
+            ((if)
+             (let* ((test-submatches (sre-count-submatches (cadr sre)))
+                    (pass (lp (caddr sre) flags (+ n test-submatches) next))
+                    (fail (if (pair? (cdddr sre))
+                              (lp (cadddr sre)
+                                  (+ n test-submatches
+                                     (sre-count-submatches (caddr sre)))
+                                  flags
+                                  next)
+                              (lambda (cnk init src str i end matches fail)
+                                (fail)))))
+               (cond
+                ((or (number? (cadr sre)) (symbol? (cadr sre)))
+                 (let ((index
+                        (if (symbol? (cadr sre))
+                            (cond
+                             ((assq (cadr sre) names) => cdr)
+                             (else
+                              (%irregex-error "unknown named backref in SRE IF" sre)))
+                            (cadr sre))))
+                   (lambda (cnk init src str i end matches fail2)
+                     (if (%irregex-match-end-chunk matches index)
+                         (pass cnk init src str i end matches fail2)
+                         (fail cnk init src str i end matches fail2)))))
+                (else
+                 (let ((test (lp (cadr sre) n flags pass)))
+                   (lambda (cnk init src str i end matches fail2)
+                     (test cnk init src str i end matches
+                           (lambda () (fail cnk init src str i end matches fail2)))
+                     ))))))
+            ((backref backref-ci)
+             (let ((n (cond ((number? (cadr sre)) (cadr sre))
+                            ((assq (cadr sre) names) => cdr)
+                            (else (%irregex-error "unknown backreference" (cadr sre)))))
+                   (compare (if (or (eq? (car sre) 'backref-ci)
+                                    (flag-set? flags ~case-insensitive?))
+                                string-ci=?
+                                string=?)))
+               (lambda (cnk init src str i end matches fail)
+                 (let ((s (irregex-match-substring matches n)))
+                   (if (not s)
+                       (fail)
+                       ;; XXXX create an abstract subchunk-compare
+                       (let lp ((src src)
+                                (str str)
+                                (i i)
+                                (end end)
+                                (j 0)
+                                (len (string-length s)))
+                         (cond
+                          ((<= len (- end i))
+                           (cond
+                            ((compare (substring s j (string-length s))
+                                      (substring str i (+ i len)))
+                             (next cnk init src str (+ i len) end matches fail))
+                            (else
+                             (fail))))
+                          (else
+                           (cond
+                            ((compare (substring s j (+ j (- end i)))
+                                      (substring str i end))
+                             (let ((src2 ((chunker-get-next cnk) src)))
+                               (if src2
+                                   (lp src2
+                                       ((chunker-get-str cnk) src2)
+                                       ((chunker-get-start cnk) src2)
+                                       ((chunker-get-end cnk) src2)
+                                       (+ j (- end i))
+                                       (- len (- end i)))
+                                   (fail))))
+                            (else
+                             (fail)))))))))))
+            ((dsm)
+             (lp (sre-sequence (cdddr sre)) (+ n (cadr sre)) flags next))
+            (($ submatch)
+             (let ((body
+                    (lp (sre-sequence (cdr sre))
+                        (+ n 1)
+                        flags
+                        (lambda (cnk init src str i end matches fail)
+                          (let ((old-source
+                                 (%irregex-match-end-chunk matches n))
+                                (old-index
+                                 (%irregex-match-end-index matches n)))
+                            (irregex-match-end-chunk-set! matches n src)
+                            (irregex-match-end-index-set! matches n i)
+                            (next cnk init src str i end matches
+                                  (lambda ()
+                                    (irregex-match-end-chunk-set!
+                                     matches n old-source)
+                                    (irregex-match-end-index-set!
+                                     matches n old-index)
+                                    (fail))))))))
+               (lambda (cnk init src str i end matches fail)
+                 (let ((old-source (%irregex-match-start-chunk matches n))
+                       (old-index (%irregex-match-start-index matches n)))
+                   (irregex-match-start-chunk-set! matches n src)
+                   (irregex-match-start-index-set! matches n i)
+                   (body cnk init src str i end matches
+                         (lambda ()
+                           (irregex-match-start-chunk-set!
+                            matches n old-source)
+                           (irregex-match-start-index-set!
+                            matches n old-index)
+                           (fail)))))))
+            ((=> submatch-named)
+             (rec `(submatch ,@(cddr sre))))
+            (else
+             (%irregex-error "unknown regexp operator" sre)))))
+     ((symbol? sre)
+      (case sre
+        ((any)
+         (lambda (cnk init src str i end matches fail)
+           (if (< i end)
+               (next cnk init src str (+ i 1) end matches fail)
+               (let ((src2 ((chunker-get-next cnk) src)))
+                 (if src2
+                     (let ((str2 ((chunker-get-str cnk) src2))
+                           (i2 ((chunker-get-start cnk) src2))
+                           (end2 ((chunker-get-end cnk) src2)))
+                       (next cnk init src2 str2 (+ i2 1) end2 matches fail))
+                     (fail))))))
+        ((nonl)
+         (lambda (cnk init src str i end matches fail)
+           (if (< i end)
+               (if (not (eqv? #\newline (string-ref str i)))
+                   (next cnk init src str (+ i 1) end matches fail)
+                   (fail))
+               (let ((src2 ((chunker-get-next cnk) src)))
+                 (if src2
+                     (let ((str2 ((chunker-get-str cnk) src2))
+                           (i2 ((chunker-get-start cnk) src2))
+                           (end2 ((chunker-get-end cnk) src2)))
+                       (if (not (eqv? #\newline (string-ref str2 i2)))
+                           (next cnk init src2 str2 (+ i2 1) end2 matches fail)
+                           (fail)))
+                     (fail))))))
+        ((bos)
+         (lambda (cnk init src str i end matches fail)
+           (if (and (eq? src (car init)) (eqv? i (cdr init)))
+               (next cnk init src str i end matches fail)
+               (fail))))
+        ((bol)
+         (lambda (cnk init src str i end matches fail)
+           (if (or (and (eq? src (car init)) (eqv? i (cdr init)))
+                   (and (> i ((chunker-get-start cnk) src))
+                        (eqv? #\newline (string-ref str (- i 1)))))
+               (next cnk init src str i end matches fail)
+               (fail))))
+        ((bow)
+         (lambda (cnk init src str i end matches fail)
+           (if (and (or (if (> i ((chunker-get-start cnk) src))
+                            (not (char-alphanumeric? (string-ref str (- i 1))))
+                            (let ((ch (chunker-prev-char cnk src end)))
+                              (and ch (not (char-alphanumeric? ch)))))
+                        (and (eq? src (car init)) (eqv? i (cdr init))))
+                    (if (< i end)
+                        (char-alphanumeric? (string-ref str i))
+                        (let ((next ((chunker-get-next cnk) src)))
+                          (and next
+                               (char-alphanumeric?
+                                (string-ref ((chunker-get-str cnk) next)
+                                            ((chunker-get-start cnk) next)))))))
+               (next cnk init src str i end matches fail)
+               (fail))))
+        ((eos)
+         (lambda (cnk init src str i end matches fail)
+           (if (and (>= i end) (not ((chunker-get-next cnk) src)))
+               (next cnk init src str i end matches fail)
+               (fail))))
+        ((eol)
+         (lambda (cnk init src str i end matches fail)
+           (if (if (< i end)
+                   (eqv? #\newline (string-ref str i))
+                   (let ((src2 ((chunker-get-next cnk) src)))
+                     (if (not src2)
+                         #t
+                         (eqv? #\newline
+                               (string-ref ((chunker-get-str cnk) src2)
+                                           ((chunker-get-start cnk) src2))))))
+               (next cnk init src str i end matches fail)
+               (fail))))
+        ((eow)
+         (lambda (cnk init src str i end matches fail)
+           (if (and (if (< i end)
+                        (not (char-alphanumeric? (string-ref str i)))
+                        (let ((ch (chunker-next-char cnk src)))
+                          (or (not ch) (not (char-alphanumeric? ch)))))
+                    (if (> i ((chunker-get-start cnk) src))
+                        (char-alphanumeric? (string-ref str (- i 1)))
+                        (let ((prev (chunker-prev-char cnk init src)))
+                          (or (not prev) (char-alphanumeric? prev)))))
+               (next cnk init src str i end matches fail)
+               (fail))))
+        ((nwb)  ;; non-word-boundary
+         (lambda (cnk init src str i end matches fail)
+           (let ((c1 (if (< i end)
+                         (string-ref str i)
+                         (chunker-next-char cnk src)))
+                 (c2 (if (> i ((chunker-get-start cnk) src))
+                         (string-ref str (- i 1))
+                         (chunker-prev-char cnk init src))))
+             (if (and c1 c2
+                      (if (char-alphanumeric? c1)
+                          (char-alphanumeric? c2)
+                          (not (char-alphanumeric? c2))))
+                 (next cnk init src str i end matches fail)
+                 (fail)))))
+        ((epsilon)
+         next)
+        (else
+         (let ((cell (assq sre sre-named-definitions)))
+           (if cell
+               (rec (cdr cell))
+               (%irregex-error "unknown regexp" sre))))))
+     ((char? sre)
+      (if (flag-set? flags ~case-insensitive?)
+          ;; case-insensitive
+          (lambda (cnk init src str i end matches fail)
+            (if (>= i end)
+                (let lp ((src2 ((chunker-get-next cnk) src)))
+                  (if src2
+                      (let ((str2 ((chunker-get-str cnk) src2))
+                            (i2 ((chunker-get-start cnk) src2))
+                            (end2 ((chunker-get-end cnk) src2)))
+                        (if (>= i2 end2)
+                            (lp ((chunker-get-next cnk) src2))
+                            (if (char-ci=? sre (string-ref str2 i2))
+                                (next cnk init src2 str2 (+ i2 1) end2
+                                      matches fail)
+                                (fail))))
+                      (fail)))
+                (if (char-ci=? sre (string-ref str i))
+                    (next cnk init src str (+ i 1) end matches fail)
+                    (fail))))
+          ;; case-sensitive
+          (lambda (cnk init src str i end matches fail)
+            (if (>= i end)
+                (let lp ((src2 ((chunker-get-next cnk) src)))
+                  (if src2
+                      (let ((str2 ((chunker-get-str cnk) src2))
+                            (i2 ((chunker-get-start cnk) src2))
+                            (end2 ((chunker-get-end cnk) src2)))
+                        (if (>= i2 end2)
+                            (lp ((chunker-get-next cnk) src2))
+                            (if (char=? sre (string-ref str2 i2))
+                                (next cnk init src2 str2 (+ i2 1) end2
+                                      matches fail)
+                                (fail))))
+                      (fail)))
+                (if (char=? sre (string-ref str i))
+                    (next cnk init src str (+ i 1) end matches fail)
+                    (fail))))
+          ))
+     ((string? sre)
+      (rec (sre-sequence (string->list sre)))
+;; XXXX reintroduce faster string matching on chunks
+;;       (if (flag-set? flags ~case-insensitive?)
+;;           (rec (sre-sequence (string->list sre)))
+;;           (let ((len (string-length sre)))
+;;             (lambda (cnk init src str i end matches fail)
+;;               (if (and (<= (+ i len) end)
+;;                        (%substring=? sre str 0 i len))
+;;                   (next str (+ i len) matches fail)
+;;                   (fail)))))
+      )
+     (else
+      (%irregex-error "unknown regexp" sre)))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Character Sets
+;;
+;; Simple character sets as lists of ranges, as used in the NFA/DFA
+;; compilation.  This is not especially efficient, but is portable and
+;; scalable for any range of character sets.
+
+(define (sre-cset->procedure cset next)
+  (lambda (cnk init src str i end matches fail)
+    (if (< i end)
+        (if (cset-contains? cset (string-ref str i))
+            (next cnk init src str (+ i 1) end matches fail)
+            (fail))
+        (let ((src2 ((chunker-get-next cnk) src)))
+          (if src2
+              (let ((str2 ((chunker-get-str cnk) src2))
+                    (i2 ((chunker-get-start cnk) src2))
+                    (end2 ((chunker-get-end cnk) src2)))
+                (if (cset-contains? cset (string-ref str2 i2))
+                    (next cnk init src2 str2 (+ i2 1) end2 matches fail)
+                    (fail)))
+              (fail))))))
+
+(define (plist->alist ls)
+  (let lp ((ls ls) (res '()))
+    (if (null? ls)
+        (reverse res)
+        (lp (cddr ls) (cons (cons (car ls) (cadr ls)) res)))))
+
+(define (alist->plist ls)
+  (let lp ((ls ls) (res '()))
+    (if (null? ls)
+        (reverse res)
+        (lp (cdr ls) (cons (cdar ls) (cons (caar ls) res))))))
+
+(define (sre->cset sre . o)
+  (let lp ((sre sre) (ci? (and (pair? o) (car o))))
+    (define (rec sre) (lp sre ci?))
+    (cond
+     ((pair? sre)
+      (if (string? (car sre))
+          (if ci?
+              (cset-case-insensitive (string->list (car sre)))
+              (string->list (car sre)))
+          (case (car sre)
+            ((~)
+             (cset-complement
+              (fold cset-union (rec (cadr sre)) (map rec (cddr sre)))))
+            ((&)
+             (fold cset-intersection (rec (cadr sre)) (map rec (cddr sre))))
+            ((-)
+             (fold (lambda (x res) (cset-difference res x))
+                   (rec (cadr sre))
+                   (map rec (cddr sre))))
+            ((/)
+             (let ((res (plist->alist (sre-flatten-ranges (cdr sre)))))
+               (if ci?
+                   (cset-case-insensitive res)
+                   res)))
+            ((or)
+             (fold cset-union (rec (cadr sre)) (map rec (cddr sre))))
+            ((w/case)
+             (lp (sre-alternate (cdr sre)) #f))
+            ((w/nocase)
+             (lp (sre-alternate (cdr sre)) #t))
+            (else
+             (%irregex-error "not a valid sre char-set operator" sre)))))
+     ((char? sre) (rec (list (string sre))))
+     ((string? sre) (rec (list sre)))
+     (else
+      (let ((cell (assq sre sre-named-definitions)))
+        (if cell
+            (rec (cdr cell))
+            (%irregex-error "not a valid sre char-set" sre)))))))
+
+;; another debugging utility
+;; (define (cset->sre cset)
+;;   (let lp ((ls cset) (chars '()) (ranges '()))
+;;     (cond
+;;      ((null? ls)
+;;       (sre-alternate
+;;        (append
+;;         (if (pair? chars) (list (list (list->string chars))) '())
+;;         (if (pair? ranges) (list (cons '/ (alist->plist ranges))) '()))))
+;;      ((char? (car ls)) (lp (cdr ls) (cons (car ls) chars) ranges))
+;;      (else (lp (cdr ls) chars (cons (car ls) ranges))))))
+
+(define (cset-contains? cset ch)
+  (find (lambda (x)
+          (or (eqv? x ch)
+              (and (pair? x) (char<=? (car x) ch) (char<=? ch (cdr x)))))
+        cset))
+
+(define (cset-range x)
+  (if (char? x) (cons x x) x))
+
+(define (char-ranges-overlap? a b)
+  (if (pair? a)
+      (if (pair? b)
+          (or (and (char<=? (car a) (cdr b)) (char<=? (car b) (cdr a)))
+              (and (char<=? (cdr b) (car a)) (char<=? (cdr a) (car b))))
+          (and (char<=? (car a) b) (char<=? b (cdr a))))
+      (if (pair? b)
+          (char-ranges-overlap? b a)
+          (eqv? a b))))
+
+(define (char-ranges-union a b)
+  (cons (if (char<=? (car a) (car b)) (car a) (car b))
+        (if (char>=? (cdr a) (cdr b)) (cdr a) (cdr b))))
+
+(define (cset-union a b)
+  (cond ((null? b) a)
+        ((find-tail (lambda (x) (char-ranges-overlap? x (car b))) a)
+         => (lambda (ls)
+              (cset-union
+               (cset-union (append (take-up-to a ls) (cdr ls))
+                           (list (char-ranges-union (cset-range (car ls))
+                                                    (cset-range (car b)))))
+               (cdr b))))
+        (else (cset-union (cons (car b) a) (cdr b)))))
+
+(define (cset-difference a b)
+  (cond ((null? b) a)
+        ((not (car b)) (cset-difference a (cdr b)))
+        ((find-tail (lambda (x) (char-ranges-overlap? x (car b))) a)
+         => (lambda (ls)
+              (apply
+               (lambda (left1 left2 same right1 right2)
+                 (let* ((a (append (take-up-to a ls) (cdr ls)))
+                        (a (if left1 (cons left1 a) a))
+                        (a (if left2 (cons left2 a) a))
+                        (b (if right1 (cset-union b (list right1)) b))
+                        (b (if right2 (cset-union b (list right2)) b)))
+                   (cset-difference a b)))
+               (intersect-char-ranges (cset-range (car ls))
+                                      (cset-range (car b))))))
+        (else (cset-difference a (cdr b)))))
+
+(define (cset-intersection a b)
+  (let intersect ((a a) (b b) (res '()))
+    (cond ((null? b) res)
+          ((find-tail (lambda (x) (char-ranges-overlap? x (car b))) a)
+           => (lambda (ls)
+                (apply
+                 (lambda (left1 left2 same right1 right2)
+                   (let* ((a (append (take-up-to a ls) (cdr ls)))
+                          (a (if left1 (cons left1 a) a))
+                          (a (if left2 (cons left2 a) a))
+                          (b (if right1 (cset-union b (list right1)) b))
+                          (b (if right2 (cset-union b (list right2)) b)))
+                     (intersect a b (cset-union res (list same)))))
+                 (intersect-char-ranges (cset-range (car ls))
+                                        (cset-range (car b))))))
+          (else (intersect a (cdr b) res)))))
+
+(define (cset-complement a)
+  (cset-difference (sre->cset *all-chars*) a))
+
+(define (cset-case-insensitive a)
+  (let lp ((ls a) (res '()))
+    (cond ((null? ls) (reverse res))
+          ((and (char? (car ls)) (char-alphabetic? (car ls)))
+           (let ((c2 (char-altcase (car ls)))
+                 (res (cons (car ls) res)))
+             (lp (cdr ls) (if (cset-contains? res c2) res (cons c2 res)))))
+          ((and (pair? (car ls))
+                (char-alphabetic? (caar ls))
+                (char-alphabetic? (cdar ls)))
+           (lp (cdr ls)
+               (cset-union (cset-union res (list (car ls)))
+                           (list (cons (char-altcase (caar ls))
+                                       (char-altcase (cdar ls)))))))
+          (else (lp (cdr ls) (cset-union res (list (car ls))))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Match and Replace Utilities
+
+(define (irregex-fold/fast irx kons knil str . o)
+  (if (not (string? str)) (%irregex-error 'irregex-fold "not a string" str))
+  (let* ((irx (irregex irx))
+         (matches (irregex-new-matches irx))
+         (finish (or (and (pair? o) (car o)) (lambda (i acc) acc)))
+         (start (if (and (pair? o) (pair? (cdr o))) (cadr o) 0))
+         (end (if (and (pair? o) (pair? (cdr o)) (pair? (cddr o)))
+                  (caddr o)
+                  (string-length str))))
+    (if (not (integer? start)) (%irregex-error 'irregex-fold "not an integer" start))
+    (if (not (integer? end)) (%irregex-error 'irregex-fold "not an integer" end))
+    (irregex-match-chunker-set! matches irregex-basic-string-chunker)
+    (let lp ((i start) (acc knil))
+      (if (>= i end)
+          (finish i acc)
+          (let ((m (irregex-search/matches
+                    irx
+                    irregex-basic-string-chunker
+                    (list str i end)
+                    i
+                    matches)))
+            (if (not m)
+                (finish i acc)
+                (let* ((end (%irregex-match-end-index m 0))
+                       (acc (kons i m acc)))
+                  (irregex-reset-matches! matches)
+                  (lp end acc))))))))
+
+(define (irregex-fold irx kons . args)
+  (if (not (procedure? kons)) (%irregex-error 'irregex-fold "not a procedure" kons))
+  (let ((kons2 (lambda (i m acc) (kons i (irregex-copy-matches m) acc))))
+    (apply irregex-fold/fast irx kons2 args)))
+
+(define (irregex-fold/chunked/fast irx kons knil cnk start . o)
+  (let* ((irx (irregex irx))
+         (matches (irregex-new-matches irx))
+         (finish (or (and (pair? o) (car o)) (lambda (src i acc) acc)))
+         (i (if (and (pair? o) (pair? (cdr o)))
+                (cadr o)
+                ((chunker-get-start cnk) start))))
+    (if (not (integer? i)) (%irregex-error 'irregex-fold/chunked "not an integer" i))
+    (irregex-match-chunker-set! matches cnk)
+    (let lp ((start start) (i i) (acc knil))
+      (if (not start)
+          (finish start i acc)
+          (let ((m (irregex-search/matches irx cnk start i matches)))
+            (if (not m)
+                (finish start i acc)
+                (let* ((acc (kons start i m acc))
+                       (end-src (%irregex-match-end-chunk m 0))
+                       (end-index (%irregex-match-end-index m 0)))
+                  (irregex-reset-matches! matches)
+                  (lp end-src end-index acc))))))))
+
+(define (irregex-fold/chunked irx kons . args)
+  (if (not (procedure? kons)) (%irregex-error 'irregex-fold/chunked "not a procedure" kons))
+  (let ((kons2 (lambda (s i m acc) (kons s i (irregex-copy-matches m) acc))))
+    (apply irregex-fold/chunked/fast irx kons2 args)))
+
+(define (irregex-replace irx str . o)
+  (if (not (string? str)) (%irregex-error 'irregex-replace "not a string" str))
+  (let ((m (irregex-search irx str)))
+    (and
+     m
+     (string-cat-reverse
+      (cons (substring str (%irregex-match-end-index m 0) (string-length str))
+            (append (irregex-apply-match m o)
+                    (list (substring str 0 (%irregex-match-start-index m 0)))
+                    ))))))
+
+(define (irregex-replace/all irx str . o)
+  (if (not (string? str)) (%irregex-error 'irregex-replace/all "not a string" str))
+  (irregex-fold/fast
+   irx
+   (lambda (i m acc)
+     (let ((m-start (%irregex-match-start-index m 0)))
+       (append (irregex-apply-match m o)
+               (if (>= i m-start)
+                   acc
+                   (cons (substring str i m-start) acc)))))
+   '()
+   str
+   (lambda (i acc)
+     (let ((end (string-length str)))
+       (string-cat-reverse (if (>= i end)
+                               acc
+                               (cons (substring str i end) acc)))))))
+
+(define (irregex-apply-match m ls)
+  (let lp ((ls ls) (res '()))
+    (if (null? ls)
+        res
+        (cond
+         ((integer? (car ls))
+          (lp (cdr ls)
+              (cons (or (irregex-match-substring m (car ls)) "") res)))
+         ((procedure? (car ls))
+          (lp (cdr ls) (cons ((car ls) m) res)))
+         ((symbol? (car ls))
+          (case (car ls)
+            ((pre)
+             (lp (cdr ls)
+                 (cons (substring (car (%irregex-match-start-chunk m 0))
+                                  0
+                                  (%irregex-match-start-index m 0))
+                       res)))
+            ((post)
+             (let ((str (car (%irregex-match-start-chunk m 0))))
+               (lp (cdr ls)
+                   (cons (substring str
+                                    (%irregex-match-end-index m 0)
+                                    (string-length str))
+                         res))))
+            (else
+             (cond
+              ((assq (car ls) (irregex-match-names m))
+               => (lambda (x) (lp (cons (cdr x) (cdr ls)) res)))
+              (else
+               (%irregex-error "unknown match replacement" (car ls)))))))
+         (else
+          (lp (cdr ls) (cons (car ls) res)))))))
+
+(define (irregex-extract irx str . o)
+  (if (not (string? str)) (%irregex-error 'irregex-extract "not a string" str))
+  (apply irregex-fold/fast
+         irx
+         (lambda (i m a) (cons (irregex-match-substring m) a))
+         '()
+         str
+         (lambda (i a) (reverse a))
+         o))
+
+(define (irregex-split irx str . o)
+  (if (not (string? str)) (%irregex-error 'irregex-split "not a string" str))
+  (let ((start (if (pair? o) (car o) 0))
+        (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str))))
+    (irregex-fold/fast
+     irx
+     (lambda (i m a)
+       (if (= i (%irregex-match-start-index m 0))
+           a
+           (cons (substring str i (%irregex-match-start-index m 0)) a)))
+     '()
+     str
+     (lambda (i a)
+       (reverse (if (= i end) a (cons (substring str i end) a))))
+     start
+     end)))
diff --git a/irregex-utils.scm b/irregex-utils.scm
new file mode 100644
index 00000000..8332791d
--- /dev/null
+++ b/irregex-utils.scm
@@ -0,0 +1,154 @@
+;;;; irregex-utils.scm
+;;
+;; Copyright (c) 2010 Alex Shinn.  All rights reserved.
+;; BSD-style license: http://synthcode.com/license.txt
+
+(define rx-special-chars
+  "\\|[](){}.*+?^$#")
+
+(define (string-scan-char str c . o)
+  (let ((end (string-length str)))
+    (let scan ((i (if (pair? o) (car o) 0)))
+      (cond ((= i end) #f)
+            ((eqv? c (string-ref str i)) i)
+            (else (scan (+ i 1)))))))
+
+(define (irregex-quote str)
+  (list->string
+   (let loop ((ls (string->list str)) (res '()))
+     (if (null? ls)
+         (reverse res)
+         (let ((c (car ls)))
+           (if (string-scan-char rx-special-chars c)
+               (loop (cdr ls) (cons c (cons #\\ res)))
+               (loop (cdr ls) (cons c res))))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define (irregex-opt ls)
+  (define (make-alt ls)
+    (cond ((null? (cdr ls)) (car ls))
+          ((every char? ls) (list (list->string ls)))
+          (else (cons 'or ls))))
+  (define (make-seq ls)
+    (cond ((null? (cdr ls)) (car ls))
+          ((every (lambda (x) (or (string? x) (char? x))) ls)
+           (apply string-append (map (lambda (x) (if (char? x) (string x) x)) ls)))
+          (else (cons 'seq ls))))
+  (cond
+   ((null? ls) "")
+   ((null? (cdr ls)) (car ls))
+   (else
+    (let ((chars (make-vector 256 '())))
+      (let lp1 ((ls ls) (empty? #f))
+        (if (null? ls)
+            (let lp2 ((i 0) (res '()))
+              (if (= i 256)
+                  (let ((res (make-alt (reverse res))))
+                    (if empty? `(? ,res) res))
+                  (let ((c (integer->char i))
+                        (opts (vector-ref chars i)))
+                    (lp2 (+ i 1)
+                         (cond
+                          ((null? opts) res)
+                          ((equal? opts '("")) `(,c ,@res))
+                          (else `(,(make-seq (list c (irregex-opt opts)))
+                                  ,@res)))))))
+            (let* ((str (car ls))
+                   (len (string-length str)))
+              (if (zero? len)
+                  (lp1 (cdr ls) #t)
+                  (let ((i (char->integer (string-ref str 0))))
+                    (vector-set!
+                     chars
+                     i
+                     (cons (substring str 1 len) (vector-ref chars i)))
+                    (lp1 (cdr ls) empty?))))))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define (cset->string ls)
+  (let ((out (open-output-string)))
+    (let lp ((ls ls))
+      (cond
+       ((pair? ls)
+        (cond
+         ((pair? (car ls))
+          (display (irregex-quote (string (caar ls))) out)
+          (write-char #\- out)
+          (display (irregex-quote (string (cdar ls))) out))
+         (else (display (irregex-quote (string (car ls))) out)))
+        (lp (cdr ls)))))
+    (get-output-string out)))
+
+(define (sre->string obj)
+  (let ((out (open-output-string)))
+    (let lp ((x obj))
+      (cond
+       ((pair? x)
+        (case (car x)
+          ((: seq)
+           (cond
+            ((and (pair? (cddr x)) (pair? (cddr x)) (not (eq? x obj)))
+             (display "(?:" out) (for-each lp (cdr x)) (display ")" out))
+            (else (for-each lp (cdr x)))))
+          ((submatch)
+           (display "(" out) (for-each lp (cdr x)) (display ")" out))
+          ((submatch-named)
+           (display "(?<" out) (display (cadr x) out) (display ">" out)
+           (for-each lp (cddr x)) (display ")" out))
+          ((or)
+           (display "(?:" out)
+           (lp (cadr x))
+           (for-each (lambda (x) (display "|" out) (lp x)) (cddr x))
+           (display ")" out))
+          ((* + ? *? ??)
+           (cond
+            ((pair? (cddr x))
+             (display "(?:" out) (for-each lp (cdr x)) (display ")" out))
+            (else (lp (cadr x))))
+           (display (car x) out))
+          ((not)
+           (cond
+            ((and (pair? (cadr x)) (eq? 'cset (caadr x)))
+             (display "[^" out)
+             (display (cset->string (cdadr x)) out)
+             (display "]" out))
+            (else (error "can't represent general 'not' in strings" x))))
+          ((cset)
+           (display "[" out)
+           (display (cset->string (cdr x)) out)
+           (display "]" out))
+          ((- & / ~)
+           (cond
+            ((or (eq? #\~ (car x))
+                 (and (eq? '- (car x)) (pair? (cdr x)) (eq? 'any (cadr x))))
+             (display "[^" out)
+             (display (cset->string (if (eq? #\~ (car x)) (cdr x) (cddr x))) out)
+             (display "]" out))
+            (else
+             (lp `(cset ,@(sre->cset x))))))
+          ((w/case w/nocase)
+           (display "(?" out)
+           (if (eq? (car x) 'w/case) (display "-" out))
+           (display ":" out)
+           (for-each lp (cdr x))
+           (display ")" out))
+          (else
+           (if (string? (car x))
+               (lp `(cset ,@(string->list (car x))))
+               (error "unknown sre operator" x)))))
+       ((symbol? x)
+        (case x
+          ((bos bol) (display "^" out))
+          ((eos eol) (display "$" out))
+          ((any nonl) (display "." out))
+          (else (error "unknown sre symbol" x))))
+       ((string? x)
+        (display (irregex-quote x) out))
+       ((char? x)
+        (display (irregex-quote (string x)) out))
+       (else
+        (error "unknown sre pattern" x))))
+    (get-output-string out)))
+
diff --git a/irregex.import.scm b/irregex.import.scm
index 7fc3bde7..4f2a81a6 100644
--- a/irregex.import.scm
+++ b/irregex.import.scm
@@ -26,11 +26,50 @@
 
 (##sys#register-primitive-module
  'irregex
- '(irregex string->irregex sre->irregex string->sre
-    irregex? irregex-match-data?
-    irregex-new-matches irregex-reset-matches!
-    irregex-match-start irregex-match-end irregex-match-substring
-    irregex-search irregex-search/matches irregex-match irregex-match-string
-    irregex-fold irregex-replace irregex-replace/all irregex-apply-match
-    irregex-dfa irregex-dfa/search irregex-dfa/extract
-    irregex-nfa irregex-flags irregex-submatches irregex-lengths irregex-names))
+ '(irregex 
+   irregex-apply-match
+   irregex-dfa 
+   irregex-dfa/extract
+   irregex-dfa/search
+   irregex-extract
+   irregex-flags
+   irregex-fold
+   irregex-fold/chunked
+   irregex-lengths
+   irregex-match
+   irregex-match?
+   irregex-match-data?
+   irregex-match-end
+   irregex-match-end-chunk
+   irregex-match-end-index
+   irregex-match-names
+   irregex-match-num-submatches
+   irregex-match-start
+   irregex-match-start-chunk
+   irregex-match-start-index
+   irregex-match-string
+   irregex-match-subchunk
+   irregex-match-substring
+   irregex-match-valid-index?
+   irregex-match/chunked
+   irregex-names
+   irregex-new-matches
+   irregex-nfa
+   irregex-num-submatches
+   irregex-opt
+   irregex-quote
+   irregex-replace
+   irregex-replace/all
+   irregex-reset-matches!
+   irregex-search
+   irregex-search/chunked
+   irregex-search/matches
+   irregex-split
+   irregex?
+   make-irregex-chunker
+   maybe-string->sre
+   sre->irregex
+   sre->string
+   string->irregex 
+   string->sre
+   ))
diff --git a/irregex.scm b/irregex.scm
index 5d0f77e7..fb4cf21e 100644
--- a/irregex.scm
+++ b/irregex.scm
@@ -1,2718 +1,254 @@
-;;;; irregex.scm -- IrRegular Expressions
-;;
-;; Copyright (c) 2005-2008 Alex Shinn.  All rights reserved.
-;; BSD-style license: http://synthcode.com/license.txt
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; At this moment there was a loud ring at the bell, and I could
-;; hear Mrs. Hudson, our landlady, raising her voice in a wail of
-;; expostulation and dismay.
-;;
-;; "By heaven, Holmes," I said, half rising, "I believe that
-;; they are really after us."
-;;
-;; "No, it's not quite so bad as that.  It is the unofficial
-;; force, -- the Baker Street irregulars."
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; History
-;;
-;; 0.6.2: 2008/07/26 - minor bugfixes, allow global disabling of utf8 mode,
-;;                     friendlier error messages in parsing, \Q..\E support
-;; 0.6.1: 2008/07/21 - added utf8 mode, more utils, bugfixes
-;;   0.6: 2008/05/01 - most of PCRE supported
-;;   0.5: 2008/04/24 - fully portable R4RS, many PCRE features implemented
-;;   0.4: 2008/04/17 - rewriting NFA to use efficient closure compilation,
-;;                     normal strings only, but all of the spencer tests pass
-;;   0.3: 2008/03/10 - adding DFA converter (normal strings only)
-;;   0.2: 2005/09/27 - adding irregex-opt (like elisp's regexp-opt) utility
-;;   0.1: 2005/08/18 - simple NFA interpreter over abstract chunked strings
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define irregex-tag '*irregex-tag*)
-
-(define (make-irregex dfa dfa/search dfa/extract nfa flags
-                      submatches lengths names)
-  (vector irregex-tag dfa dfa/search dfa/extract nfa flags
-          submatches lengths names))
-
-(define (irregex? obj)
-  (and (vector? obj)
-       (= 9 (vector-length obj))
-       (eq? irregex-tag (vector-ref obj 0))))
-
-(define (irregex-dfa x) (vector-ref x 1))
-(define (irregex-dfa/search x) (vector-ref x 2))
-(define (irregex-dfa/extract x) (vector-ref x 3))
-(define (irregex-nfa x) (vector-ref x 4))
-(define (irregex-flags x) (vector-ref x 5))
-(define (irregex-submatches x) (vector-ref x 6))
-(define (irregex-lengths x) (vector-ref x 7))
-(define (irregex-names x) (vector-ref x 8))
-
-(define (irregex-new-matches irx)
-  (make-irregex-match #f (irregex-submatches irx) (irregex-names irx)))
-(define (irregex-reset-matches! m)
-  (do ((i (- (vector-length m) 1) (- i 1)))
-      ((<= i 3) m)
-    (vector-set! m i #f)))
-
-(define irregex-match-tag '*irregex-match-tag*)
-
-(define (irregex-match-data? obj)
-  (and (vector? obj)
-       (>= (vector-length obj) 5)
-       (eq? irregex-match-tag (vector-ref obj 0))))
-
-(define (make-irregex-match str count names)
-  (let ((res (make-vector (+ (* 2 (+ 1 count)) 3) #f)))
-    (vector-set! res 0 irregex-match-tag)
-    (vector-set! res 1 str)
-    (vector-set! res 2 names)
-    res))
-
-(define (irregex-match-num-submatches m)
-  (- (quotient (- (vector-length m) 3) 2) 1))
-
-(define (irregex-match-string m)
-  (vector-ref m 1))
-(define (irregex-match-names m)
-  (vector-ref m 2))
-(define (irregex-match-string-set! m str)
-  (vector-set! m 1 str))
-
-(define (irregex-match-start-index m n)
-  (vector-ref m (+ 3 (* n 2))))
-(define (irregex-match-end-index m n)
-  (vector-ref m (+ 4 (* n 2))))
-
-(define (irregex-match-start-index-set! m n start)
-  (vector-set! m (+ 3 (* n 2)) start))
-(define (irregex-match-end-index-set! m n end)
-  (vector-set! m (+ 4 (* n 2)) end))
-
-(define (irregex-match-index m opt)
-  (if (pair? opt)
-      (cond ((number? (car opt)) (car opt))
-            ((assq (car opt) (irregex-match-names m)) => cdr)
-            (else (error "unknown match name" (car opt))))
-      0))
-
-(define (irregex-match-valid-index? m n)
-  (and (< (+ 3 (* n 2)) (vector-length m))
-       (vector-ref m (+ 4 (* n 2)))))
-
-(define (irregex-match-substring m . opt)
-  (let ((n (irregex-match-index m opt)))
-    (and (irregex-match-valid-index? m n)
-         (substring (irregex-match-string m)
-                    (vector-ref m (+ 3 (* n 2)))
-                    (vector-ref m (+ 4 (* n 2)))))))
-
-(define (irregex-match-start m . opt)
-  (let ((n (irregex-match-index m opt)))
-    (and (irregex-match-valid-index? m n)
-         (vector-ref m (+ 3 (* n 2))))))
-
-(define (irregex-match-end m . opt)
-  (irregex-match-valid-index? m (irregex-match-index m opt)))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; string utilities
-
-;;;; Unicode version (skip surrogates)
-(define *all-chars*
-  `(/ ,(integer->char 0) ,(integer->char #xD7FF)
-      ,(integer->char #xE000) ,(integer->char #x10FFFF)))
-
-;;;; ASCII version, offset to not assume 0-255
-;; (define *all-chars* `(/ ,(integer->char (- (char->integer #\space) 32)) ,(integer->char (+ (char->integer #\space) 223))))
-
-;; set to #f to ignore even an explicit request for utf8 handling
-(define *allow-utf8-mode?* #t)
-
-;; (define *named-char-properties* '())
-
-(define (string-scan-char str c . o)
-  (let ((end (string-length str)))
-    (let scan ((i (if (pair? o) (car o) 0)))
-      (cond ((= i end) #f)
-            ((eqv? c (string-ref str i)) i)
-            (else (scan (+ i 1)))))))
-
-(define (string-scan-char-escape str c . o)
-  (let ((end (string-length str)))
-    (let scan ((i (if (pair? o) (car o) 0)))
-      (cond ((= i end) #f)
-            ((eqv? c (string-ref str i)) i)
-            ((eqv? c #\\) (scan (+ i 2)))
-            (else (scan (+ i 1)))))))
-
-(define (string-scan-pred str pred . o)
-  (let ((end (string-length str)))
-    (let scan ((i (if (pair? o) (car o) 0)))
-      (cond ((= i end) #f)
-            ((pred (string-ref str i)) i)
-            (else (scan (+ i 1)))))))
-
-(define (string-split-char str c)
-  (let ((end (string-length str)))
-    (let lp ((i 0) (from 0) (res '()))
-      (define (collect) (cons (substring str from i) res))
-      (cond ((>= i end) (reverse (collect)))
-            ((eqv? c (string-ref str i)) (lp (+ i 1) (+ i 1) (collect)))
-            (else (lp (+ i 1) from res))))))
-
-(define (char-alphanumeric? c)
-  (or (char-alphabetic? c) (char-numeric? c)))
-
-;; SRFI-13 extracts
-
-(define (%%string-copy! to tstart from fstart fend)
-  (do ((i fstart (+ i 1))
-       (j tstart (+ j 1)))
-      ((>= i fend))
-    (string-set! to j (string-ref from i))))
-
-(define (string-cat-reverse string-list)
-  (string-cat-reverse/aux
-   (fold (lambda (s a) (+ (string-length s) a)) 0 string-list)
-   string-list))
-
-(define (string-cat-reverse/aux len string-list)
-  (let ((res (make-string len)))
-    (let lp ((i len) (ls string-list))
-      (if (pair? ls)
-	  (let* ((s (car ls))
-		 (slen (string-length s))
-		 (i (- i slen)))
-	    (%%string-copy! res i s 0 slen)
-	    (lp i (cdr ls)))))
-    res))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; list utilities
-
-;; like the one-arg IOTA case
-(define (zero-to n)
-  (if (<= n 0)
-      '()
-      (let lp ((i (- n 1)) (res '()))
-        (if (zero? i) (cons 0 res) (lp (- i 1) (cons i res))))))
-
-;; take the head of list FROM up to but not including TO, which must
-;; be a tail of the list
-(define (take-up-to from to)
-  (let lp ((ls from) (res '()))
-    (if (and (pair? ls) (not (eq? ls to)))
-        (lp (cdr ls) (cons (car ls) res))
-        (reverse res))))
-
-;; SRFI-1 extracts (simplified 1-ary versions)
-
-(define (find pred ls)
-  (cond ((find-tail pred ls) => car)
-	(else #f)))
-
-(define (find-tail pred ls)
-  (let lp ((ls ls))
-    (cond ((null? ls) #f)
-          ((pred (car ls)) ls)
-          (else (lp (cdr ls))))))
-
-(define (last ls)
-  (if (not (pair? ls))
-      (error "can't take last of empty list" ls)
-      (let lp ((ls ls))
-        (if (pair? (cdr ls))
-            (lp (cdr ls))
-            (car ls)))))
-
-(define (any pred ls)
-  (and (pair? ls)
-       (let lp ((head (car ls)) (tail (cdr ls)))
-         (if (null? tail)
-             (pred head)
-             (or (pred head) (lp (car tail) (cdr tail)))))))
-
-(define (every pred ls)
-  (or (null? ls)
-      (let lp ((head (car ls))  (tail (cdr ls)))
-        (if (null? tail)
-            (pred head)
-            (and (pred head) (lp (car tail) (cdr tail)))))))
-
-(define (fold kons knil ls)
-  (let lp ((ls ls) (res knil))
-    (if (null? ls)
-        res
-        (lp (cdr ls) (kons (car ls) res)))))
-
-(define (filter pred ls)
-  (let lp ((ls ls) (res '()))
-    (if (null? ls)
-        (reverse res)
-        (lp (cdr ls) (if (pred (car ls)) (cons (car ls) res) res)))))
-
-(define (remove pred ls)
-  (let lp ((ls ls) (res '()))
-    (if (null? ls)
-        (reverse res)
-        (lp (cdr ls) (if (pred (car ls)) res (cons (car ls) res))))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; flags
-
-(define (bit-shr n i)
-  (quotient n (expt 2 i)))
-
-(define (bit-shl n i)
-  (* n (expt 2 i)))
-
-(define (bit-not n) (- #xFFFF n))
-
-(define (bit-ior a b)
-  (cond
-   ((zero? a) b)
-   ((zero? b) a)
-   (else
-    (+ (if (or (odd? a) (odd? b)) 1 0)
-       (* 2 (bit-ior (quotient a 2) (quotient b 2)))))))
-
-(define (bit-and a b)
-  (cond
-   ((zero? a) 0)
-   ((zero? b) 0)
-   (else
-    (+ (if (and (odd? a) (odd? b)) 1 0)
-       (* 2 (bit-and (quotient a 2) (quotient b 2)))))))
-
-(define (flag-set? flags i)
-  (= i (bit-and flags i)))
-(define (flag-join a b)
-  (if b (bit-ior a b) a))
-(define (flag-clear a b)
-  (bit-and a (bit-not b)))
-
-(define ~none 0)
-(define ~searcher? 1)
-(define ~consumer? 2)
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; parsing
-
-(define ~save? 1)
-(define ~case-insensitive? 2)
-(define ~multi-line? 4)
-(define ~single-line? 8)
-(define ~ignore-space? 16)
-(define ~utf8? 32)
-
-(define (symbol-list->flags ls)
-  (let lp ((ls ls) (res ~none))
-    (if (not (pair? ls))
-        res
-        (lp (cdr ls)
-            (flag-join
-             res
-             (case (car ls)
-               ((i ci case-insensitive) ~case-insensitive?)
-               ((m multi-line) ~multi-line?)
-               ((s single-line) ~single-line?)
-               ((x ignore-space) ~ignore-space?)
-               ((u utf8) ~utf8?)
-               (else #f)))))))
-
-(define (string->sre str . o)
-  (let ((end (string-length str))
-        (flags (symbol-list->flags o)))
-
-    (let lp ((i 0) (from 0) (flags flags) (res '()) (st '()))
-
-      ;; handle case sensitivity at the literal char/string level
-      (define (cased-char ch)
-        (if (and (flag-set? flags ~case-insensitive?)
-                 (char-alphabetic? ch))
-            `(or ,ch ,(char-altcase ch))
-            ch))
-      (define (cased-string str)
-        (if (flag-set? flags ~case-insensitive?)
-            (sre-sequence (map cased-char (string->list str)))
-            str))
-      ;; accumulate the substring from..i as literal text
-      (define (collect)
-        (if (= i from) res (cons (cased-string (substring str from i)) res)))
-      ;; like collect but breaks off the last single character when
-      ;; collecting literal data, as the argument to ?/*/+ etc.
-      (define (collect/single)
-        (let* ((utf8? (flag-set? flags ~utf8?))
-               (j (if (and utf8? (> i 1))
-                      (utf8-backup-to-initial-char str (- i 1))
-                      (- i 1))))
-          (cond
-           ((< j from)
-            res)
-           (else
-            (let ((c (cased-char (if utf8?
-                                     (utf8-string-ref str j (- i j) )
-                                     (string-ref str j)))))
-              (cond
-               ((= j from)
-                (cons c res))
-               (else
-                (cons c
-                      (cons (cased-string (substring str from j))
-                            res)))))))))
-      ;; collects for use as a result, reversing and grouping OR
-      ;; terms, and some ugly tweaking of `function-like' groups and
-      ;; conditionals
-      (define (collect/terms)
-        (let* ((ls (collect))
-               (func
-                (and (pair? ls)
-                     (memq (last ls)
-                           '(atomic if look-ahead neg-look-ahead
-                                    look-behind neg-look-behind submatch-named
-                                    w/utf8 w/noutf8))))
-               (prefix (if (and func (eq? 'submatch-named (car func)))
-                           (list 'submatch-named (cadr (reverse ls)))
-                           (and func (list (car func)))))
-               (ls (if func
-                       (if (eq? 'submatch-named (car func))
-                           (reverse (cddr (reverse ls)))
-                           (reverse (cdr (reverse ls))))
-                       ls)))
-          (let lp ((ls ls) (term '()) (res '()))
-            (define (shift)
-              (cons (sre-sequence term) res))
-            (cond
-             ((null? ls)
-              (let* ((res (sre-alternate (shift)))
-                     (res (if (flag-set? flags ~save?)
-                              (list 'submatch res)
-                              res)))
-                (if prefix
-                    (if (eq? 'if (car prefix))
-                        (cond
-                         ((not (pair? res))
-                          'epsilon)
-                         ((memq (car res)
-                                '(look-ahead neg-look-ahead
-                                             look-behind neg-look-behind))
-                          res)
-                         ((eq? 'seq (car res))
-                          `(if ,(cadr res)
-                               ,(if (pair? (cdr res))
-                                    (sre-sequence (cddr res))
-                                    'epsilon)))
-                         (else
-                          `(if ,(cadadr res)
-                               ,(if (pair? (cdr res))
-                                    (sre-sequence (cddadr res))
-                                    'epsilon)
-                               ,(sre-alternate
-                                 (if (pair? (cdr res)) (cddr res) '())))))
-                        `(,@prefix ,res))
-                    res)))
-             ((eq? 'or (car ls)) (lp (cdr ls) '() (shift)))
-             (else (lp (cdr ls) (cons (car ls) term) res))))))
-      (define (save)
-        (cons (cons flags (collect)) st))
-
-      ;; main parsing
-      (if (>= i end)
-          (if (pair? st)
-              (error "unterminated parenthesis in regexp" str)
-              (collect/terms))
-          (let ((c (string-ref str i)))
-            (case c
-              ((#\.)
-               (lp (+ i 1) (+ i 1) flags
-                   (cons (if (flag-set? flags ~single-line?) 'any 'nonl)
-                         (collect))
-                   st))
-              ((#\?)
-               (let ((res (collect/single)))
-                 (if (null? res)
-                     (error "? can't follow empty sre" str res)
-                     (let ((x (car res)))
-                       (lp (+ i 1)
-                           (+ i 1)
-                           flags
-                           (cons
-                            (if (pair? x)
-                                (case (car x)
-                                  ((*)  `(*? ,@(cdr x)))
-                                  ((+)  `(**? 1 #f ,@(cdr x)))
-                                  ((?)  `(?? ,@(cdr x)))
-                                  ((**) `(**? ,@(cdr x)))
-                                  ((=)  `(**? ,(cadr x) ,@(cdr x)))
-                                  ((>=)  `(**? ,(cadr x) #f ,@(cddr x)))
-                                  (else `(? ,x)))
-                                `(? ,x))
-                            (cdr res))
-                           st)))))
-              ((#\+ #\*)
-               (let* ((res (collect/single))
-                      (x (car res))
-                      (op (string->symbol (string c))))
-                 (cond
-                  ((sre-repeater? x)
-                   (error "duplicate repetition (e.g. **) in sre" str res))
-                  ((sre-empty? x)
-                   (error "can't repeat empty sre (e.g. ()*)" str res))
-                  (else
-                   (lp (+ i 1) (+ i 1) flags
-                       (cons (list op x) (cdr res))
-                       st)))))
-              ((#\()
-               (cond
-                ((>= (+ i 1) end)
-                 (error "unterminated parenthesis in regexp" str))
-                ((not (eqv? #\? (string-ref str (+ i 1))))
-                 (lp (+ i 1) (+ i 1) (flag-join flags ~save?) '() (save)))
-                ((>= (+ i 2) end)
-                 (error "unterminated parenthesis in regexp" str))
-                (else
-                 (case (string-ref str (+ i 2))
-                   ((#\#)
-                    (let ((j (string-scan-char str #\) (+ i 3))))
-                      (lp (+ j i) (+ j 1) flags (collect) st)))
-                   ((#\:)
-                    (lp (+ i 3) (+ i 3) (flag-clear flags ~save?) '() (save)))
-                   ((#\=)
-                    (lp (+ i 3) (+ i 3) (flag-clear flags ~save?)
-                        '(look-ahead) (save)))
-                   ((#\!)
-                    (lp (+ i 3) (+ i 3) (flag-clear flags ~save?)
-                        '(neg-look-ahead) (save)))
-                   ((#\<)
-                    (cond
-                     ((>= (+ i 3) end)
-                      (error "unterminated parenthesis in regexp" str))
-                     (else
-                      (case (string-ref str (+ i 3))
-                        ((#\=)
-                         (lp (+ i 4) (+ i 4) (flag-clear flags ~save?)
-                             '(look-behind) (save)))
-                        ((#\!)
-                         (lp (+ i 4) (+ i 4) (flag-clear flags ~save?)
-                             '(neg-look-behind) (save)))
-                        (else
-                         (let ((j (and (char-alphabetic?
-                                        (string-ref str (+ i 3)))
-                                       (string-scan-char str #\> (+ i 4)))))
-                           (if j
-                               (lp (+ j 1) (+ j 1) (flag-clear flags ~save?)
-                                   `(,(string->symbol (substring str (+ i 3) j))
-                                     submatch-named)
-                                   (save))
-                               (error "invalid (?< sequence" str))))))))
-                   ((#\>)
-                    (lp (+ i 3) (+ i 3) (flag-clear flags ~save?)
-                        '(atomic) (save)))
-                   ;;((#\' #\P) ; named subpatterns
-                   ;; )
-                   ;;((#\R) ; recursion
-                   ;; )
-                   ((#\()
-                    (cond
-                     ((>= (+ i 3) end)
-                      (error "unterminated parenthesis in regexp" str))
-                     ((char-numeric? (string-ref str (+ i 3)))
-                      (let* ((j (string-scan-char str #\) (+ i 3)))
-                             (n (string->number (substring str (+ i 3) j))))
-                        (if (not n)
-                            (error "invalid conditional reference" str)
-                            (lp (+ j 1) (+ j 1) (flag-clear flags ~save?)
-                                `(,n if) (save)))))
-                     ((char-alphabetic? (string-ref str (+ i 3)))
-                      (let* ((j (string-scan-char str #\) (+ i 3)))
-                             (s (string->symbol (substring str (+ i 3) j))))
-                        (lp (+ j 1) (+ j 1) (flag-clear flags ~save?)
-                            `(,s if) (save))))
-                     (else
-                      (lp (+ i 2) (+ i 2) (flag-clear flags ~save?)
-                          '(if) (save)))))
-                   ((#\{)
-                    (error "unsupported Perl-style cluster" str))
-                   (else
-                    (let ((old-flags flags))
-                      (let lp2 ((j (+ i 2)) (flags flags) (invert? #f))
-                        (define (join x)
-                          ((if invert? flag-clear flag-join) flags x))
-                        (define (new-res res)
-                          (let ((before (flag-set? old-flags ~utf8?))
-                                (after (flag-set? flags ~utf8?)))
-                            (if (eq? before after)
-                                res
-                                (cons (if after 'w/utf8 'w/noutf8) res))))
-                        (cond
-                         ((>= j end)
-                          (error "incomplete cluster" str i))
-                         (else
-                          (case (string-ref str j)
-                            ((#\i)
-                             (lp2 (+ j 1) (join ~case-insensitive?) invert?))
-                            ((#\m)
-                             (lp2 (+ j 1) (join ~multi-line?) invert?))
-                            ((#\x)
-                             (lp2 (+ j 1) (join ~ignore-space?) invert?))
-                            ((#\u)
-                             (if *allow-utf8-mode?*
-                                 (lp2 (+ j 1) (join ~utf8?) invert?)
-                                 (lp2 (+ j 1) flags invert?)))
-                            ((#\-)
-                             (lp2 (+ j 1) flags (not invert?)))
-                            ((#\))
-                             (lp (+ j 1) (+ j 1) flags (new-res (collect))
-                                 st))
-                            ((#\:)
-                             (lp (+ j 1) (+ j 1) flags (new-res '())
-                                 (cons (cons old-flags (collect)) st)))
-                            (else
-                             (error "unknown regex cluster modifier" str)
-                             )))))))))))
-              ((#\))
-               (if (null? st)
-                   (error "too many )'s in regexp" str)
-                   (lp (+ i 1)
-                       (+ i 1)
-                       (caar st)
-                       (cons (collect/terms) (cdar st))
-                       (cdr st))))
-              ((#\[)
-               (apply
-                (lambda (sre j)
-                  (lp (+ j 1) (+ j 1) flags (cons sre (collect)) st))
-                (string-parse-cset str (+ i 1) flags)))
-              ((#\{)
-               (if (or (>= (+ i 1) end)
-                       (not (or (char-numeric? (string-ref str (+ i 1)))
-                                (eqv? #\, (string-ref str (+ i 1))))))
-                   (lp (+ i 1) from flags res st)
-                   (let* ((res (collect/single))
-                          (x (car res))
-                          (tail (cdr res))
-                          (j (string-scan-char str #\} (+ i 1)))
-                          (s2 (string-split-char (substring str (+ i 1) j) #\,))
-                          (n (or (string->number (car s2)) 0))
-                          (m (and (pair? (cdr s2)) (string->number (cadr s2)))))
-                     (cond
-                      ((null? (cdr s2))
-                       (lp (+ j 1) (+ j 1) flags `((= ,n ,x) ,@tail) st))
-                      (m
-                       (lp (+ j 1) (+ j 1) flags `((** ,n ,m ,x) ,@tail) st))
-                      (else
-                       (lp (+ j 1) (+ j 1) flags `((>= ,n ,x) ,@tail) st)
-                       )))))
-              ((#\\)
-               (cond
-                ((>= (+ i 1) end)
-                 (error "incomplete escape sequence" str))
-                (else
-                 (let ((c (string-ref str (+ i 1))))
-                   (case c
-                     ((#\d)
-                      (lp (+ i 2) (+ i 2) flags `(numeric ,@(collect)) st))
-                     ((#\D)
-                      (lp (+ i 2) (+ i 2) flags `((~ numeric) ,@(collect)) st))
-                     ((#\s)
-                      (lp (+ i 2) (+ i 2) flags `(space ,@(collect)) st))
-                     ((#\S)
-                      (lp (+ i 2) (+ i 2) flags `((~ space) ,@(collect)) st))
-                     ((#\w)
-                      (lp (+ i 2) (+ i 2) flags
-                          `((or alphanumeric ("_")) ,@(collect)) st))
-                     ((#\W)
-                      (lp (+ i 2) (+ i 2) flags
-                          `((~ (or alphanumeric ("_"))) ,@(collect)) st))
-                     ((#\b)
-                      (lp (+ i 2) (+ i 2) flags
-                          `((or bow eow) ,@(collect)) st))
-                     ((#\B)
-                      (lp (+ i 2) (+ i 2) flags `(nwb ,@(collect)) st))
-                     ((#\A)
-                      (lp (+ i 2) (+ i 2) flags `(bos ,@(collect)) st))
-                     ((#\Z)
-                      (lp (+ i 2) (+ i 2) flags
-                          `((? #\newline) eos ,@(collect)) st))
-                     ((#\z)
-                      (lp (+ i 2) (+ i 2) flags `(eos ,@(collect)) st))
-                     ((#\R)
-                      (lp (+ i 2) (+ i 2) flags `(newline ,@(collect)) st))
-                     ((#\K)
-                      (lp (+ i 2) (+ i 2) flags `(reset ,@(collect)) st))
-                     ;; these two are from Emacs and TRE, but not PCRE
-                     ((#\<)
-                      (lp (+ i 2) (+ i 2) flags `(bow ,@(collect)) st))
-                     ((#\>)
-                      (lp (+ i 2) (+ i 2) flags `(eow ,@(collect)) st))
-                     ((#\x)
-                      (apply
-                       (lambda (ch j)
-                         (lp (+ j 1) (+ j 1) flags `(,ch ,@(collect)) st))
-                       (string-parse-hex-escape str (+ i 2) end)))
-                     ((#\k)
-                      (let ((c (string-ref str (+ i 2))))
-                        (if (not (memv c '(#\< #\{ #\')))
-                            (error "bad \\k usage, expected \\k<...>" str)
-                            (let* ((terminal (char-mirror c))
-                                   (j (string-scan-char str terminal (+ i 2)))
-                                   (s (and j (substring str (+ i 3) j)))
-                                   (backref
-                                    (if (flag-set? flags ~case-insensitive?)
-                                        'backref-ci
-                                        'backref)))
-                              (if (not j)
-                                  (error "interminated named backref" str)
-                                  (lp (+ j 1) (+ j 1) flags
-                                      `((,backref ,(string->symbol s))
-                                        ,@(collect))
-                                      st))))))
-                     ((#\Q)  ;; \Q..\E escapes
-                      (let ((res (collect)))
-                        (let lp2 ((j (+ i 2)))
-                          (cond
-                           ((>= j end)
-                            (lp j (+ i 2) flags res st))
-                           ((eqv? #\\ (string-ref str j))
-                            (cond
-                             ((>= (+ j 1) end)
-                              (lp (+ j 1) (+ i 2) flags res st))
-                             ((eqv? #\E (string-ref str (+ j 1)))
-                              (lp (+ j 2) (+ j 2) flags
-                                  (cons (substring str (+ i 2) j) res) st))
-                             (else
-                              (lp2 (+ j 2)))))
-                           (else
-                            (lp2 (+ j 1)))))))
-                     ;;((#\p)  ; XXXX unicode properties
-                     ;; )
-                     ;;((#\P)
-                     ;; )
-                     (else
-                      (cond
-                       ((char-numeric? c)
-                        (let* ((j (or (string-scan-pred
-                                       str
-                                       (lambda (c) (not (char-numeric? c)))
-                                       (+ i 2))
-                                      end))
-                               (backref
-                                (if (flag-set? flags ~case-insensitive?)
-                                    'backref-ci
-                                    'backref))
-                               (res `((,backref ,(string->number
-                                                  (substring str (+ i 1) j)))
-                                      ,@(collect))))
-                          (lp j j flags res st)))
-                       ((char-alphabetic? c)
-                        (let ((cell (assv c posix-escape-sequences)))
-                          (if cell
-                              (lp (+ i 2) (+ i 2) flags
-                                  (cons (cdr cell) (collect)) st)
-                              (error "unknown escape sequence" str c))))
-                       (else
-                        (lp (+ i 2) (+ i 1) flags (collect) st)))))))))
-              ((#\|)
-               (lp (+ i 1) (+ i 1) flags (cons 'or (collect)) st))
-              ((#\^)
-               (let ((sym (if (flag-set? flags ~multi-line?) 'bol 'bos)))
-                 (lp (+ i 1) (+ i 1) flags (cons sym (collect)) st)))
-              ((#\$)
-               (let ((sym (if (flag-set? flags ~multi-line?) 'eol 'eos)))
-                 (lp (+ i 1) (+ i 1) flags (cons sym (collect)) st)))
-              ((#\space)
-               (if (flag-set? flags ~ignore-space?)
-                   (lp (+ i 1) (+ i 1) flags (collect) st)
-                   (lp (+ i 1) from flags res st)))
-              ((#\#)
-               (if (flag-set? flags ~ignore-space?)
-                   (let ((j (or (string-scan-char str #\newline (+ i 1))
-                                (- end 1))))
-                     (lp (+ j 1) (+ j 1) flags (collect) st))
-                   (lp (+ i 1) from flags res st)))
-              (else
-               (lp (+ i 1) from flags res st))))))))
-
-(define posix-escape-sequences
-  `((#\n . #\newline)
-    (#\r . ,(integer->char (+ (char->integer #\newline) 3)))
-    (#\t . ,(integer->char (- (char->integer #\newline) 1)))
-    (#\a . ,(integer->char (- (char->integer #\newline) 3)))
-    (#\e . ,(integer->char (+ (char->integer #\newline) #x11)))
-    (#\f . ,(integer->char (+ (char->integer #\newline) 2)))
-    ))
-
-(define (char-altcase c)
-  (if (char-upper-case? c) (char-downcase c) (char-upcase c)))
-
-(define (char-mirror c)
-  (case c ((#\<) #\>) ((#\{) #\}) ((#\() #\)) ((#\[) #\]) (else c)))
-
-(define (string-parse-hex-escape str i end)
-  (cond
-   ((>= i end)
-    (error "incomplete hex escape" str i))
-   ((eqv? #\{ (string-ref str i))
-    (let ((j (string-scan-char-escape str #\} (+ i 1))))
-      (if (not j)
-          (error "incomplete hex brace escape" str i)
-          (let* ((s (substring str (+ i 1) j))
-                 (n (string->number s 16)))
-            (if n
-                (list (integer->char n) j)
-                (error "bad hex brace escape" s))))))
-   ((>= (+ i 1) end)
-    (error "incomplete hex escape" str i))
-   (else
-    (let* ((s (substring str i (+ i 2)))
-           (n (string->number s 16)))
-      (if n
-          (list (integer->char n) (+ i 2))
-          (error "bad hex escape" s))))))
-
-(define (string-parse-cset str start flags)
-  (let ((end (string-length str))
-        (invert? (eqv? #\^ (string-ref str start)))
-        (utf8? (flag-set? flags ~utf8?)))
-    (define (go i chars ranges)
-      (if (>= i end)
-          (error "incomplete char set")
-          (let ((c (string-ref str i)))
-            (case c
-              ((#\])
-               (if (and (null? chars) (null? ranges))
-                   (go (+ i 1) (cons #\] chars) ranges)
-                   (let ((ci? (flag-set? flags ~case-insensitive?))
-                         (hi-chars (if utf8? (filter high-char? chars) '()))
-                         (chars (if utf8? (remove high-char? chars) chars)))
-                     (list
-                      ((lambda (res)
-                         (if invert? (cons '~ res) (sre-alternate res)))
-                       (append
-                        hi-chars
-                        (if (pair? chars)
-                            (list
-                             (list (list->string
-                                    ((if ci?
-                                         cset-case-insensitive
-                                         (lambda (x) x))
-                                     (reverse chars)))))
-                            '())
-                        (if (pair? ranges)
-                            (let ((res (if ci?
-                                           (cset-case-insensitive
-                                            (reverse ranges))
-                                           (reverse ranges))))
-                              (list (cons '/ (alist->plist res))))
-                            '())))
-                      i))))
-              ((#\-)
-               (cond
-                ((or (= i start)
-                     (and (= i (+ start 1)) (eqv? #\^ (string-ref str start)))
-                     (eqv? #\] (string-ref str (+ i 1))))
-                 (go (+ i 1) (cons c chars) ranges))
-                ((null? chars)
-                 (error "bad char-set"))
-                (else
-                 (let* ((c1 (car chars))
-                        (c2 (string-ref str (+ i 1))))
-                   (apply
-                    (lambda (c2 j)
-                      (if (char<? c2 c1)
-                          (error "inverted range in char-set" c1 c2)
-                          (go j (cdr chars) (cons (cons c1 c2) ranges))))
-                    (cond
-                     ((and (eqv? #\\ c2) (assv c2 posix-escape-sequences))
-                      => (lambda (x) (list (cdr x) (+ i 3))))
-                     ((and (eqv? #\\ c2)
-                           (eqv? (string-ref str (+ i 2)) #\x))
-                      (string-parse-hex-escape str (+ i 3) end))
-                     ((and utf8? (<= #x80 (char->integer c2) #xFF))
-                      (let ((len (utf8-start-char->length c2)))
-                        (list (utf8-string-ref str (+ i 1) len) (+ i 1 len))))
-                     (else
-                      (list c2 (+ i 2)))))))))
-              ((#\[)
-               (let* ((inv? (eqv? #\^ (string-ref str (+ i 1))))
-                      (i2 (if inv? (+ i 2) (+ i 1))))
-                 (case (string-ref str i2)
-                   ((#\:)
-                    (let ((j (string-scan-char str #\: (+ i2 1))))
-                      (if (or (not j) (not (eqv? #\] (string-ref str (+ j 1)))))
-                          (error "incomplete character class" str)
-                          (let* ((cset (sre->cset
-                                        (string->symbol
-                                         (substring str (+ i2 1) j))))
-                                 (cset (if inv? (cset-complement cset) cset)))
-                            (go (+ j 2)
-                                (append (filter char? cset) chars)
-                                (append (filter pair? cset) ranges))))))
-                   ((#\= #\.)
-                    (error "collating sequences not supported" str))
-                   (else
-                    (go (+ i 1) (cons #\[ chars) ranges)))))
-              ((#\\)
-               (let ((c (string-ref str (+ i 1))))
-                 (case c
-                   ((#\d #\D #\s #\S #\w #\W)
-                    (let ((cset (sre->cset (string->sre (string #\\ c)))))
-                      (go (+ i 2)
-                          (append (filter char? cset) chars)
-                          (append (filter pair? cset) ranges))))
-                   ((#\x)
-                    (apply
-                     (lambda (ch j)
-                       (go j (cons ch chars) ranges))
-                     (string-parse-hex-escape str (+ i 2) end)))
-                   (else
-                    (let ((c (cond ((assv c posix-escape-sequences) => cdr)
-                                   (else c))))
-                      (go (+ i 2)
-                          (cons (string-ref str (+ i 1)) (cons c chars))
-                          ranges))))))
-              (else
-               (if (and utf8? (<= #x80 (char->integer c) #xFF))
-                   (let ((len (utf8-start-char->length c)))
-                     (go (+ i len)
-                         (cons (utf8-string-ref str i len) chars)
-                         ranges))
-                   (go (+ i 1) (cons c chars) ranges)))))))
-    (if invert?
-        (go (+ start 1)
-            (if (flag-set? flags ~multi-line?) '(#\newline) '())
-            '())
-        (go start '() '()))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; utf8 utilities
-
-;; Here are some hairy optimizations that need to be documented
-;; better.  Thanks to these, we never do any utf8 processing once the
-;; regexp is compiled.
-
-;; two chars: ab..ef
-;;            a[b..xFF]|[b-d][x80..xFF]|e[x80..xFF]
-
-;; three chars: abc..ghi
-;;              ab[c..xFF]|a[d..xFF][x80..xFF]|
-;;              [b..f][x80..xFF][x80..xFF]|
-;;              g[x80..g][x80..xFF]|gh[x80..i]
-
-;; four chars: abcd..ghij
-;;             abc[d..xFF]|ab[d..xFF][x80..xFF]|a[c..xFF][x80..xFF][x80..xFF]|
-;;             [b..f][x80..xFF][x80..xFF][x80..xFF]|
-;;             g[x80..g][x80..xFF][x80..xFF]|gh[x80..h][x80..xFF]|ghi[x80..j]
-
-(define (high-char? c) (<= #x80 (char->integer c)))
-
-;; number of total bytes in a utf8 char given the 1st byte
-
-(define utf8-start-char->length
-  (let ((table '#(
-1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 0x
-1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 1x
-1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 2x
-1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 3x
-1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 4x
-1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 5x
-1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 6x
-1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 7x
-1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 8x
-1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 9x
-1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; ax
-1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; bx
-2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ; cx
-2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ; dx
-3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ; ex
-4 4 4 4 4 4 4 4 5 5 5 5 6 6 0 0 ; fx
-)))
-    (lambda (c) (vector-ref table (char->integer c)))))
-
-(define (utf8-string-ref str i len)
-  (define (byte n) (char->integer (string-ref str n)))
-  (case len
-    ((1) ; shouldn't happen in this module
-     (string-ref str i))
-    ((2)
-     (integer->char
-      (+ (bit-shl (bit-and (byte i) #b00011111) 6)
-         (bit-and (byte (+ i 1)) #b00111111))))
-    ((3)
-     (integer->char
-      (+ (bit-shl (bit-and (byte i) #b00001111) 12)
-         (bit-shl (bit-and (byte (+ i 1)) #b00111111) 6)
-         (bit-and (byte (+ i 2)) #b00111111))))
-    ((4)
-     (integer->char
-      (+ (bit-shl (bit-and (byte i) #b00000111) 18)
-         (bit-shl (bit-and (byte (+ i 1)) #b00111111) 12)
-         (bit-shl (bit-and (byte (+ i 2)) #b00111111) 6)
-         (bit-and (byte (+ i 3)) #b00111111))))
-    (else
-     (error "invalid utf8 length" str len i))))
-
-(define (utf8-backup-to-initial-char str i)
-  (let lp ((i i))
-    (if (= i 0)
-        0
-        (let ((c (char->integer (string-ref str i))))
-          (if (or (< c #x80) (>= c #xC0))
-              i
-              (lp (- i 1)))))))
-
-(define (utf8-lowest-digit-of-length len)
-  (case len
-    ((1) 0) ((2) #xC0) ((3) #xE0) ((4) #xF0)
-    (else (error "invalid utf8 length" len))))
-
-(define (utf8-highest-digit-of-length len)
-  (case len
-    ((1) #x7F) ((2) #xDF) ((3) #xEF) ((4) #xF7)
-    (else (error "invalid utf8 length" len))))
-
-(define (char->utf8-list c)
-  (let ((i (char->integer c)))
-    (cond
-     ((<= i #x7F) (list i))
-     ((<= i #x7FF)
-      (list (bit-ior #b11000000 (bit-shr i 6))
-            (bit-ior #b10000000 (bit-and i #b111111))))
-     ((<= i #xFFFF)
-      (list (bit-ior #b11100000 (bit-shr i 12))
-            (bit-ior #b10000000 (bit-and (bit-shr i 6) #b111111))
-            (bit-ior #b10000000 (bit-and i #b111111))))
-     ((<= i #x1FFFFF)
-      (list (bit-ior #b11110000 (bit-shr i 18))
-            (bit-ior #b10000000 (bit-and (bit-shr i 12) #b111111))
-            (bit-ior #b10000000 (bit-and (bit-shr i 6) #b111111))
-            (bit-ior #b10000000 (bit-and i #b111111))))
-     (else (error "unicode codepoint out of range:" i)))))
-
-(define (unicode-range->utf8-pattern lo hi)
-  (let ((lo-ls (char->utf8-list lo))
-        (hi-ls (char->utf8-list hi)))
-    (if (not (= (length lo-ls) (length hi-ls)))
-        (sre-alternate (list (unicode-range-climb-digits lo-ls hi-ls)
-                             (unicode-range-up-to hi-ls)))
-        (let lp ((lo-ls lo-ls) (hi-ls hi-ls))
-          (cond
-           ((null? lo-ls)
-            '())
-           ((= (car lo-ls) (car hi-ls))
-            (sre-sequence
-             (list (integer->char (car lo-ls))
-                   (lp (cdr lo-ls) (cdr hi-ls)))))
-           ((= (+ (car lo-ls) 1) (car hi-ls))
-            (sre-alternate (list (unicode-range-up-from lo-ls)
-                                 (unicode-range-up-to hi-ls))))
-           (else
-            (sre-alternate (list (unicode-range-up-from lo-ls)
-                                 (unicode-range-middle lo-ls hi-ls)
-                                 (unicode-range-up-to hi-ls)))))))))
-
-(define (unicode-range-helper one ls prefix res)
-  (if (null? ls)
-      res
-      (unicode-range-helper
-       one
-       (cdr ls)
-       (cons (car ls) prefix)
-       (cons (sre-sequence
-              `(,@(map integer->char prefix)
-                ,(one (car ls))
-                ,@(map (lambda (_)
-                         `(/ ,(integer->char #x80)
-                             ,(integer->char #xFF)))
-                       (cdr ls))))
-             res))))
-
-(define (unicode-range-up-from lo-ls)
-  (sre-sequence
-   (list (integer->char (car lo-ls))
-         (sre-alternate
-          (unicode-range-helper
-           (lambda (c)
-             `(/ ,(integer->char (+ (car lo-ls) 1)) ,(integer->char #xFF)))
-           (cdr (reverse (cdr lo-ls)))
-           '()
-           (list
-            (sre-sequence
-             (append
-              (map integer->char (reverse (cdr (reverse (cdr lo-ls)))))
-              `((/ ,(integer->char (last lo-ls))
-                   ,(integer->char #xFF)))))))))))
-
-(define (unicode-range-up-to hi-ls)
-  (sre-sequence
-   (list (integer->char (car hi-ls))
-         (sre-alternate
-          (unicode-range-helper
-           (lambda (c)
-             `(/ ,(integer->char #x80) ,(integer->char (- (car hi-ls) 1))))
-           (cdr (reverse (cdr hi-ls)))
-           '()
-           (list
-            (sre-sequence
-             (append
-              (map integer->char (reverse (cdr (reverse (cdr hi-ls)))))
-              `((/ ,(integer->char #x80)
-                   ,(integer->char (last hi-ls))))))))))))
-
-(define (unicode-range-climb-digits lo-ls hi-ls)
-  (let ((lo-len (length lo-ls)))
-    (sre-alternate
-     (append
-      (list
-       (sre-sequence
-        (cons `(/ ,(integer->char (car lo-ls))
-                  ,(integer->char (if (<= (car lo-ls) #x7F) #x7F #xFF)))
-              (map (lambda (_)
-                     `(/ ,(integer->char #x80) ,(integer->char #xFF)))
-                   (cdr lo-ls)))))
-      (map
-       (lambda (i)
-         (sre-sequence
-          (cons
-           `(/ ,(integer->char (utf8-lowest-digit-of-length (+ i lo-len 1)))
-               ,(integer->char (utf8-highest-digit-of-length (+ i lo-len 1))))
-           (map (lambda (_)
-                  `(/ ,(integer->char #x80) ,(integer->char #xFF)))
-                (zero-to (+ i lo-len))))))
-       (zero-to (- (length hi-ls) lo-len 1)))
-      (list
-       (sre-sequence
-        (cons `(/ ,(integer->char
-                    (utf8-lowest-digit-of-length
-                     (utf8-start-char->length
-                      (integer->char (- (car hi-ls) 1)))))
-                  ,(integer->char (- (car hi-ls) 1)))
-              (map (lambda (_)
-                     `(/ ,(integer->char #x80) ,(integer->char #xFF)))
-                   (cdr hi-ls)))))))))
-
-(define (unicode-range-middle lo-ls hi-ls)
-  (let ((lo (integer->char (+ (car lo-ls) 1)))
-        (hi (integer->char (- (car hi-ls) 1))))
-    (sre-sequence
-     (cons (if (char=? lo hi) lo `(/ ,lo ,hi))
-           (map (lambda (_) `(/ ,(integer->char #x80) ,(integer->char #xFF)))
-                (cdr lo-ls))))))
-
-(define (cset->utf8-pattern cset)
-  (let lp ((ls cset) (alts '()) (lo-cset '()))
-    (cond
-     ((null? ls)
-      (sre-alternate (append (reverse alts)
-                             (if (null? lo-cset)
-                                 '()
-                                 (list (cons '/ (reverse lo-cset)))))))
-     ((char? (car ls))
-      (if (high-char? (car ls))
-          (lp (cdr ls) (cons (car ls) alts) lo-cset)
-          (lp (cdr ls) alts (cons (car ls) lo-cset))))
-     (else
-      (if (or (high-char? (caar ls))  (high-char? (cdar ls)))
-          (lp (cdr ls)
-              (cons (unicode-range->utf8-pattern (caar ls) (cdar ls)) alts)
-              lo-cset)
-          (lp (cdr ls) alts (cons (cdar ls) (cons (caar ls) lo-cset))))))))
-
-(define (sre-adjust-utf8 sre flags)
-  (let adjust ((sre sre)
-               (utf8? (flag-set? flags ~utf8?))
-               (ci? (flag-set? flags ~case-insensitive?)))
-    (define (rec sre) (adjust sre utf8? ci?))
-    (cond
-     ((pair? sre)
-      (case (car sre)
-        ((w/utf8) (adjust (sre-sequence (cdr sre)) #t ci?))
-        ((w/noutf8) (adjust (sre-sequence (cdr sre)) #f ci?))
-        ((w/case)
-         (cons (car sre) (map (lambda (s) (adjust s utf8? #f)) (cdr sre))))
-        ((w/nocase)
-         (cons (car sre) (map (lambda (s) (adjust s utf8? #t)) (cdr sre))))
-        ((/ ~ & -)
-         (if (not utf8?)
-             sre
-             (let ((cset (sre->cset sre ci?)))
-               (if (any (lambda (x)
-                          (if (pair? x)
-                              (or (high-char? (car x)) (high-char? (cdr x)))
-                              (high-char? x)))
-                        cset)
-                   (if ci?
-                       (list 'w/case (cset->utf8-pattern cset))
-                       (cset->utf8-pattern cset))
-                   sre))))
-        ((*)
-         (case (sre-sequence (cdr sre))
-           ;; special case optimization: .* w/utf8 == .* w/noutf8
-           ((any) '(* any))
-           ((nonl) '(* nonl))
-           (else (cons '* (map rec (cdr sre))))))
-        (else
-         (cons (car sre) (map rec (cdr sre))))))
-     (else
-      (case sre
-        ((any) 'utf8-any)
-        ((nonl) 'utf8-nonl)
-        (else
-         (if (and utf8? (char? sre) (high-char? sre))
-             (sre-sequence (map integer->char (char->utf8-list sre)))
-             sre)))))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; compilation
-
-(define (irregex x . o)
-  (cond
-   ((irregex? x) x)
-   ((string? x) (apply string->irregex x o))
-   (else (apply sre->irregex x o))))
-
-(define (string->irregex str . o)
-  (apply sre->irregex (apply string->sre str o) o))
-
-(define (sre->irregex sre . o)
-  (let* ((pat-flags (symbol-list->flags o))
-         (sre (if *allow-utf8-mode?*
-                  (sre-adjust-utf8 sre pat-flags)
-                  sre))
-         (searcher? (sre-searcher? sre))
-         (sre-dfa (if searcher? (sre-remove-initial-bos sre) sre))
-         (dfa-limit (cond ((memq 'small o) 1) ((memq 'fast o) 50) (else 10)))
-         (dfa/search
-          (if searcher?
-              #t
-              (cond ((sre->nfa `(seq (* any) ,sre-dfa) pat-flags)
-                     => (lambda (nfa) (nfa->dfa nfa (* dfa-limit (length nfa)))))
-                    (else #f))))
-         (dfa (cond ((and dfa/search (sre->nfa sre-dfa pat-flags))
-                     => (lambda (nfa) (nfa->dfa nfa (* dfa-limit (length nfa)))))
-                    (else #f)))
-         (extractor (and dfa dfa/search (sre-match-extractor sre-dfa)))
-         (submatches (sre-count-submatches sre-dfa))
-         (names (sre-names sre-dfa 1 '()))
-         (lens (sre-length-ranges sre-dfa names))
-         (flags (flag-join
-                 (flag-join ~none (and searcher? ~searcher?))
-                 (and (sre-consumer? sre) ~consumer?))))
-    (cond
-     (dfa
-      (make-irregex dfa dfa/search extractor #f flags submatches lens names))
-     (else
-      (let ((f (sre->procedure sre pat-flags names)))
-        (make-irregex #f #f #f f flags submatches lens names))))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; sre analysis
-
-;; returns #t if the sre can ever be empty
-(define (sre-empty? sre)
-  (if (pair? sre)
-      (case (car sre)
-        ((* ? look-ahead look-behind neg-look-ahead neg-look-behind) #t)
-        ((**) (or (not (number? (cadr sre))) (zero? (cadr sre))))
-        ((or) (any sre-empty? (cdr sre)))
-        ((: seq submatch + atomic) (every sre-empty? (cdr sre)))
-        (else #f))
-      (memq sre '(epsilon bos eos bol eol bow eow commit))))
-
-(define (sre-any? sre)
-  (or (eq? sre 'any)
-      (and (pair? sre)
-           (case (car sre)
-             ((seq : submatch)
-              (and (pair? (cdr sre)) (null? (cddr sre)) (sre-any? (cadr sre))))
-             ((or) (every sre-any? (cdr sre)))
-             (else #f)))))
-
-(define (sre-repeater? sre)
-  (and (pair? sre)
-       (or (memq (car sre) '(* +))
-           (and (memq (car sre) '(submatch seq :))
-                (pair? (cdr sre))
-                (null? (cddr sre))
-                (sre-repeater? (cadr sre))))))
-
-(define (sre-searcher? sre)
-  (if (pair? sre)
-      (case (car sre)
-        ((* +) (sre-any? (sre-sequence (cdr sre))))
-        ((seq : submatch) (and (pair? (cdr sre)) (sre-searcher? (cadr sre))))
-        ((or) (every sre-searcher? (cdr sre)))
-        (else #f))
-      (eq? 'bos sre)))
-
-(define (sre-consumer? sre)
-  (if (pair? sre)
-      (case (car sre)
-        ((* +) (sre-any? (sre-sequence (cdr sre))))
-        ((seq : submatch) (and (pair? (cdr sre)) (sre-consumer? (last sre))))
-        ((or) (every sre-consumer? (cdr sre)))
-        (else #f))
-      (eq? 'eos sre)))
-
-(define (sre-has-submatchs? sre)
-  (and (pair? sre)
-       (or (eq? 'submatch (car sre))
-           (any sre-has-submatchs? (cdr sre)))))
-
-(define (sre-count-submatches sre)
-  (let count ((sre sre) (sum 0))
-    (if (pair? sre)
-        (fold count
-              (+ sum (case (car sre)
-                       ((submatch submatch-named) 1)
-                       ((dsm) (+ (cadr sre) (caddr sre)))
-                       (else 0)))
-              (cdr sre))
-        sum)))
-
-(define (sre-length-ranges sre . o)
-  (let ((names (if (pair? o) (car o) (sre-names sre 1 '())))
-        (sublens (make-vector (+ 1 (sre-count-submatches sre)) #f)))
-    (vector-set!
-     sublens
-     0
-     (let lp ((sre sre) (n 1) (lo 0) (hi 0) (return cons))
-       (define (grow i) (return (+ lo i) (and hi (+ hi i))))
-       (cond
-        ((pair? sre)
-         (if (string? (car sre))
-             (grow 1)
-             (case (car sre)
-               ((/ ~ & -)
-                (grow 1))
-               ((posix-string)
-                (lp (string->sre (cadr sre)) n lo hi return))
-               ((seq : w/case w/nocase atomic)
-                (let lp2 ((ls (cdr sre)) (n n) (lo2 0) (hi2 0))
-                  (if (null? ls)
-                      (return (+ lo lo2) (and hi hi2 (+ hi hi2)))
-                      (lp (car ls) n 0 0
-                          (lambda (lo3 hi3)
-                            (lp2 (cdr ls)
-                                 (+ n (sre-count-submatches (car ls)))
-                                 (+ lo2 lo3)
-                                 (and hi2 hi3 (+ hi2 hi3))))))))
-               ((or)
-                (let lp2 ((ls (cdr sre)) (n n) (lo2 #f) (hi2 0))
-                  (if (null? ls)
-                      (return (+ lo lo2) (and hi hi2 (+ hi hi2)))
-                      (lp (car ls) n 0 0
-                          (lambda (lo3 hi3)
-                            (lp2 (cdr ls)
-                                 (+ n (sre-count-submatches (car ls)))
-                                 (if lo2 (min lo2 lo3) lo3)
-                                 (and hi2 hi3 (max hi2 hi3))))))))
-               ((if)
-                (cond
-                 ((or (null? (cdr sre)) (null? (cddr sre)))
-                  (return lo hi))
-                 (else
-                  (let ((n1 (sre-count-submatches (car sre)))
-                        (n2 (sre-count-submatches (cadr sre))))
-                    (lp (if (or (number? (cadr sre)) (symbol? (cadr sre)))
-                            'epsilon
-                            (cadr sre))
-                        n lo hi
-                        (lambda (lo2 hi2)
-                          (lp (caddr sre) (+ n n1) 0 0
-                              (lambda (lo3 hi3)
-                                (lp (if (pair? (cdddr sre))
-                                        (cadddr sre)
-                                        'epsilon)
-                                    (+ n n1 n2) 0 0
-                                    (lambda (lo4 hi4)
-                                      (return (+ lo2 (min lo3 lo4))
-                                              (and hi2 hi3 hi4
-                                                   (+ hi2 (max hi3 hi4))
-                                                   ))))))))))))
-               ((dsm)
-                (lp (sre-sequence (cdddr sre)) (+ n (cadr sre)) lo hi return))
-               ((submatch submatch-named)
-                (lp (sre-sequence
-                     (if (eq? 'submatch (car sre)) (cdr sre) (cddr sre)))
-                    (+ n 1) lo hi
-                    (lambda (lo2 hi2)
-                      (vector-set! sublens n (cons lo2 hi2))
-                      (return lo2 hi2))))
-               ((backref backref-ci)
-                (let ((n (cond
-                          ((number? (cadr sre)) (cadr sre))
-                          ((assq (cadr sre) names) => cdr)
-                          (else (error "unknown backreference" (cadr sre))))))
-                  (cond
-                   ((or (not (integer? n))
-                        (not (< 0 n (vector-length sublens))))
-                    (error "sre-length: invalid backreference" sre))
-                   ((not (vector-ref sublens n))
-                    (error "sre-length: invalid forward backreference" sre))
-                   (else
-                    (let ((lo2 (car (vector-ref sublens n)))
-                          (hi2 (cdr (vector-ref sublens n))))
-                      (return (+ lo lo2) (and hi hi2 (+ hi hi2))))))))
-               ((* *?)
-                (lp (sre-sequence (cdr sre)) n lo hi (lambda (lo hi) #f))
-                (return lo #f))
-               ((** **?)
-                (cond
-                 ((or (and (number? (cadr sre))
-                           (number? (caddr sre))
-                           (> (cadr sre) (caddr sre)))
-                      (and (not (cadr sre)) (caddr sre)))
-                  (return lo hi))
-                 (else
-                  (if (caddr sre)
-                      (lp (sre-sequence (cdddr sre)) n 0 0
-                          (lambda (lo2 hi2)
-                            (return (+ lo (* (cadr sre) lo2))
-                                    (and hi hi2 (+ hi (* (caddr sre) hi2))))))
-                      (lp (sre-sequence (cdddr sre)) n 0 0
-                          (lambda (lo2 hi2)
-                            (return (+ lo (* (cadr sre) lo2)) #f)))))))
-               ((+)
-                (lp (sre-sequence (cdr sre)) n lo hi
-                    (lambda (lo2 hi2)
-                      (return (+ lo lo2) #f))))
-               ((? ??)
-                (lp (sre-sequence (cdr sre)) n lo hi
-                    (lambda (lo2 hi2)
-                      (return lo (and hi hi2 (+ hi hi2))))))
-               ((= =? >= >=?)
-                (lp `(** ,(cadr sre)
-                         ,(if (memq (car sre) '(>= >=?)) #f (cadr sre))
-                         ,@(cddr sre))
-                    n lo hi return))
-               ((look-ahead neg-look-ahead look-behind neg-look-behind)
-                (return lo hi))
-               (else
-                (error "sre-length-ranges: unknown sre operator" sre)))))
-        ((char? sre)
-         (grow 1))
-        ((string? sre)
-         (grow (string-length sre)))
-        ((memq sre '(any nonl))
-         (grow 1))
-        ((memq sre '(epsilon bos eos bol eol bow eow nwb commit))
-         (return lo hi))
-        (else
-         (let ((cell (assq sre sre-named-definitions)))
-           (if cell
-               (lp (cdr cell) n lo hi return)
-               (error "sre-length-ranges: unknown sre" sre)))))))
-    sublens))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; sre manipulation
-
-;; build a (seq ls ...) sre from a list
-(define (sre-sequence ls)
-  (cond
-   ((null? ls) 'epsilon)
-   ((null? (cdr ls)) (car ls))
-   (else (cons 'seq ls))))
-
-;; build a (or ls ...) sre from a list
-(define (sre-alternate ls)
-  (cond
-   ((null? ls) 'epsilon)
-   ((null? (cdr ls)) (car ls))
-   (else (cons 'or ls))))
-
-;; returns an equivalent SRE without any match information
-(define (sre-strip-submatches sre)
-  (if (not (pair? sre))
-      sre
-      (case (car sre)
-        ((submatch) (sre-strip-submatches (sre-sequence (cdr sre))))
-        ((dsm) (sre-strip-submatches (sre-sequence (cdddr sre))))
-        (else (map sre-strip-submatches sre)))))
-
-;; given a char-set list of chars and strings, flattens them into
-;; chars only
-(define (sre-flatten-ranges ls)
-  (let lp ((ls ls) (res '()))
-    (cond
-     ((null? ls)
-      (reverse res))
-     ((string? (car ls))
-      (lp (append (string->list (car ls)) (cdr ls)) res))
-     (else
-      (lp (cdr ls) (cons (car ls) res))))))
-
-(define (sre-names sre n names)
-  (if (not (pair? sre))
-      names
-      (case (car sre)
-        ((submatch)
-         (sre-names (sre-sequence (cdr sre)) (+ n 1) names))
-        ((submatch-named)
-         (sre-names (sre-sequence (cddr sre))
-                    (+ n 1)
-                    (cons (cons (cadr sre) n) names)))
-        ((dsm)
-         (sre-names (sre-sequence (cdddr sre)) (+ n (cadr sre)) names))
-        ((seq : or * + ? *? ?? w/case w/nocase atomic
-              look-ahead look-behind neg-look-ahead neg-look-behind)
-         (sre-sequence-names (cdr sre) n names))
-        ((= >=)
-         (sre-sequence-names (cddr sre) n names))
-        ((** **?)
-         (sre-sequence-names (cdddr sre) n names))
-        (else
-         names))))
-
-(define (sre-sequence-names ls n names)
-  (if (null? ls)
-      names
-      (sre-sequence-names (cdr ls)
-                          (+ n (sre-count-submatches (car ls)))
-                          (sre-names (car ls) n names))))
-
-(define (sre-remove-initial-bos sre)
-  (cond
-   ((pair? sre)
-    (case (car sre)
-      ((seq : submatch * +)
-       (cond
-        ((not (pair? (cdr sre)))
-         sre)
-        ((eq? 'bos (cadr sre))
-         (cons (car sre) (cddr sre)))
-        (else
-         (cons (car sre)
-               (cons (sre-remove-initial-bos (cadr sre)) (cddr sre))))))
-      ((or)
-       (sre-alternate (map sre-remove-initial-bos (cdr sre))))
-      (else
-       sre)))
-   (else
-    sre)))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; matching
-
-(define (irregex-search x str . o)
-  (let ((irx (irregex x)))
-    (let ((start (if (pair? o) (car o) 0))
-          (end   (if (and (pair? o) (pair? (cdr o)))
-		     (cadr o) (string-length str)))
-          (matches (irregex-new-matches irx)))
-      (irregex-match-string-set! matches str)
-      (irregex-search/matches irx str start end matches))))
-
-;; internal routine, can be used in loops to avoid reallocating the
-;; match vector
-(define (irregex-search/matches irx str start end matches)
-  (cond
-   ((irregex-dfa irx)
-    (cond
-     ((flag-set? (irregex-flags irx) ~searcher?)
-      (let ((m-end (dfa-match/longest (irregex-dfa irx) str start end)))
-        (cond
-         (m-end
-          (irregex-match-start-index-set! matches 0 start)
-          (irregex-match-end-index-set! matches 0 m-end)
-          ((irregex-dfa/extract irx) str start m-end matches)
-          matches)
-         (else
-          #f))))
-     (else
-      (let ((first-match
-             (dfa-match/shortest (irregex-dfa/search irx) str start end)))
-        (and
-         first-match
-         (let* ((lo+hi (vector-ref (irregex-lengths irx) 0))
-                (m-start (if (cdr lo+hi)
-                             (max start (- first-match (cdr lo+hi)))
-                             start))
-                (m-limit (- first-match (car lo+hi)))
-                (dfa (irregex-dfa irx)))
-           (let lp ((m-start m-start))
-             (and (<= m-start m-limit)
-                  (let ((m-end (dfa-match/longest dfa str m-start end)))
-                    (cond
-                     (m-end
-                      (irregex-match-start-index-set! matches 0 m-start)
-                      (irregex-match-end-index-set! matches 0 m-end)
-                      ((irregex-dfa/extract irx) str m-start m-end matches)
-                      matches)
-                     (else
-                      (lp (+ m-start 1)))))))))))))
-   (else
-    (let ((matcher (irregex-nfa irx)))
-      (let lp ((start start))
-        (and (<= start end)
-             (let ((i (matcher str start matches (lambda () #f))))
-               (cond
-                (i
-                 (irregex-match-start-index-set! matches 0 start)
-                 (irregex-match-end-index-set! matches 0 i)
-                 matches)
-                (else
-                 (lp (+ start 1)))))))))))
-
-(define (irregex-match irx str)
-  (let* ((irx (irregex irx))
-         (matches (irregex-new-matches irx))
-         (start 0)
-         (end (string-length str)))
-    (irregex-match-string-set! matches str)
-    (cond
-     ((irregex-dfa irx)
-      (let ((m-end (dfa-match/longest (irregex-dfa irx) str start end)))
-        (cond
-         ((equal? m-end end)
-          (irregex-match-start-index-set! matches 0 start)
-          (irregex-match-end-index-set! matches 0 m-end)
-          ((irregex-dfa/extract irx) str start m-end matches)
-          matches)
-         (else
-          #f))))
-     (else
-      (let* ((matcher (irregex-nfa irx))
-             (i (matcher str start matches (lambda () #f))))
-        (cond
-         ((equal? i end)
-          (irregex-match-start-index-set! matches 0 start)
-          (irregex-match-end-index-set! matches 0 i)
-          matches)
-         (else
-          #f)))))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; DFA matching
-
-;; inline these
-(define (dfa-init-state dfa)
-  (vector-ref dfa 0))
-(define (dfa-next-state dfa node)
-  (vector-ref dfa (cdr node)))
-(define (dfa-final-state? dfa state)
-  (car state))
-
-;; this searches for the first end index for which a match is possible
-(define (dfa-match/shortest dfa str start end)
-  (let lp ((i start) (state (dfa-init-state dfa)))
-    (if (dfa-final-state? dfa state)
-        i
-        (and (< i end)
-             (let* ((ch (string-ref str i))
-                    (next (find (lambda (x)
-                                  (or (eqv? ch (car x))
-                                      (and (pair? (car x))
-                                           (char<=? (caar x) ch)
-                                           (char<=? ch (cdar x)))))
-                                (cdr state))))
-               (and next (lp (+ i 1) (dfa-next-state dfa next))))))))
-
-;; this finds the longest match starting at a given index
-(define (dfa-match/longest dfa str start end)
-  (let lp ((i start)
-           (state (dfa-init-state dfa))
-           (res (and (dfa-final-state? dfa (dfa-init-state dfa)) start)))
-    (if (>= i end)
-        res
-        (let* ((ch (string-ref str i))
-               (cell (find (lambda (x)
-                             (or (eqv? ch (car x))
-                                 (and (pair? (car x))
-                                      (char<=? (caar x) ch)
-                                      (char<=? ch (cdar x)))))
-                           (cdr state))))
-          (if cell
-              (let ((next (dfa-next-state dfa cell)))
-                (lp (+ i 1)
-                    next
-                    (if (dfa-final-state? dfa next) (+ i 1) res)))
-              res)))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; SRE->NFA compilation
-;;
-;; An NFA state is a numbered node with a list of patter->number
-;; transitions, where pattern is either a character, (lo . hi)
-;; character range, or epsilon (indicating an empty transition).
-;; There may be duplicate characters and overlapping ranges - since
-;; it's an NFA we process it by considering all possible transitions.
-
-(define sre-named-definitions
-  `((any . ,*all-chars*)
-    (nonl . (- ,*all-chars* (,(string #\newline))))
-    (alphabetic . (/ #\a #\z #\A #\Z))
-    (alpha . alphabetic)
-    (alphanumeric . (/ #\a #\z #\A #\Z #\0 #\9))
-    (alphanum . alphanumeric)
-    (alnum . alphanumeric)
-    (lower-case . (/ #\a #\z))
-    (lower . lower-case)
-    (upper-case . (/ #\A #\Z))
-    (upper . upper-case)
-    (numeric . (/ #\0 #\9))
-    (num . numeric)
-    (digit . numeric)
-    (punctuation . (or #\! #\" #\# #\% #\& #\' #\( #\) #\* #\, #\- #\.
-                       #\/ #\: #\; #\? #\@ #\[ #\\ #\] #\_ #\{ #\}))
-    (punct . punctuation)
-    (graphic
-     . (or alphanumeric punctuation #\$ #\+ #\< #\= #\> #\^ #\` #\| #\~))
-    (graph . graphic)
-    (blank . (or #\space ,(integer->char (- (char->integer #\space) 23))))
-    (whitespace . (or blank #\newline))
-    (space . whitespace)
-    (white . whitespace)
-    (printing or graphic whitespace)
-    (print . printing)
-    ;; XXXX we assume a (possibly shifted) ASCII-based ordering
-    (control . (/ ,(integer->char (- (char->integer #\space) 32))
-                  ,(integer->char (- (char->integer #\space) 1))))
-    (cntrl . control)
-    (hex-digit . (or numeric (/ #\a #\f #\A #\F)))
-    (xdigit . hex-digit)
-    (ascii . (/ ,(integer->char (- (char->integer #\space) 32))
-                ,(integer->char (+ (char->integer #\space) 95))))
-    (ascii-nonl . (/ ,(integer->char (- (char->integer #\space) 32))
-                     ,(integer->char (- (char->integer #\newline) 1))
-                     ,(integer->char (+ (char->integer #\newline) 1))
-                     ,(integer->char (+ (char->integer #\space) 95))))
-    (newline . (or (seq ,(integer->char (+ (char->integer #\newline) 3))
-                        #\newline)
-                   (/ #\newline
-                      ,(integer->char (+ (char->integer #\newline) 3)))))
-
-    ;; ... it's really annoying to support scheme48
-    (word . (seq bow (+ (or alphanumeric #\_)) eow))
-    (utf8-tail-char . (/ ,(integer->char (+ (char->integer #\space) #x60))
-                         ,(integer->char (+ (char->integer #\space) #xA1))))
-    (utf8-2-char . (seq (/ ,(integer->char (+ (char->integer #\space) #xA2))
-                           ,(integer->char (+ (char->integer #\space) #xBF)))
-                        utf8-tail-char))
-    (utf8-3-char . (seq (/ ,(integer->char (+ (char->integer #\space) #xC0))
-                           ,(integer->char (+ (char->integer #\space) #xCF)))
-                        utf8-tail-char
-                        utf8-tail-char))
-    (utf8-4-char . (seq (/ ,(integer->char (+ (char->integer #\space) #xD0))
-                           ,(integer->char (+ (char->integer #\space) #xD7)))
-                        utf8-tail-char
-                        utf8-tail-char
-                        utf8-tail-char))
-    (utf8-any . (or ascii utf8-2-char utf8-3-char utf8-4-char))
-    (utf8-nonl . (or ascii-nonl utf8-2-char utf8-3-char utf8-4-char))
-    ))
-
-;; Compile and return the list of NFA states.  The start state will be
-;; at the head of the list, and all remaining states will be in
-;; descending numeric order, with state 0 being the unique accepting
-;; state.
-(define (sre->nfa sre . o)
-  ;; we loop over an implicit sequence list
-  (let lp ((ls (list sre))
-           (n 1)
-           (flags (if (pair? o) (car o) ~none))
-           (next (list (list 0))))
-    (define (new-state-number state)
-      (max n (+ 1 (caar state))))
-    (define (extend-state next . trans)
-      (and next
-           (cons (cons (new-state-number next)
-                       (map (lambda (x) (cons x (caar next))) trans))
-                 next)))
-    (if (null? ls)
-        next
-        (cond
-         ((string? (car ls))
-          ;; process literal strings a char at a time
-          (lp (append (string->list (car ls)) (cdr ls)) n flags next))
-         ((eq? 'epsilon (car ls))
-          ;; chars and epsilons go directly into the transition table
-          (extend-state (lp (cdr ls) n flags next) (car ls)))
-         ((char? (car ls))
-          (let ((alt (char-altcase (car ls))))
-            (if (and (flag-set? flags ~case-insensitive?)
-                     (not (eqv? (car ls) alt)))
-                (extend-state (lp (cdr ls) n flags next) (car ls) alt)
-                (extend-state (lp (cdr ls) n flags next) (car ls)))))
-         ((symbol? (car ls))
-          (let ((cell (assq (car ls) sre-named-definitions)))
-            (and cell (lp (cons (cdr cell) (cdr ls)) n flags next))))
-         ((pair? (car ls))
-          (cond
-           ((string? (caar ls))
-            ;; enumerated character set
-            (lp (cons (sre-alternate (string->list (caar ls))) (cdr ls))
-                n
-                flags
-                next))
-           (else
-            (case (caar ls)
-              ((seq :)
-               ;; for an explicit sequence, just append to the list
-               (lp (append (cdar ls) (cdr ls)) n flags next))
-              ((w/case w/nocase w/utf8 w/noutf8)
-               (let* ((next (lp (cdr ls) n flags next))
-                      (flags ((if (memq (caar ls) '(w/case w/utf8))
-                                  flag-clear
-                                  flag-join)
-                              flags
-                              (if (memq (caar ls) '(w/case w/nocase))
-                                  ~case-insensitive?
-                                  ~utf8?))))
-                 (and next (lp (cdar ls) (new-state-number next) flags next))))
-              ((/ - & ~) 
-               (let ((ranges (sre->cset (car ls)
-                                        (flag-set? flags ~case-insensitive?))))
-                 (case (length ranges)
-                   ((1)
-                    (extend-state (lp (cdr ls) n flags next) (car ranges)))
-                   (else
-                    (let ((next (lp (cdr ls) n flags next)))
-                      (and
-                       next
-                       (lp (list (sre-alternate
-                                  (map (lambda (x) (if (pair? x)
-                                                  (list '/ (car x) (cdr x))
-                                                  x))
-                                       ranges)))
-                           (new-state-number next)
-                           (flag-clear flags ~case-insensitive?)
-                           next)))))))
-              ((or)
-               (let* ((next (lp (cdr ls) n flags next))
-                      (b (and next
-                              (lp (list (sre-alternate (cddar ls)))
-                                  (new-state-number next)
-                                  flags
-                                  next)))
-                      (a (and b (lp (list (cadar ls))
-                                    (new-state-number b)
-                                    flags
-                                    next))))
-                 ;; compile both branches and insert epsilon
-                 ;; transitions to either
-                 (and a
-                      `((,(new-state-number a)
-                         (epsilon . ,(caar a))
-                         (epsilon . ,(caar b)))
-                        ,@(take-up-to a next)
-                        ,@b))))
-              ((?)
-               (let ((next (lp (cdr ls) n flags next)))
-                 ;; insert an epsilon transition directly to next
-                 (and
-                  next
-                  (let ((a (lp (cdar ls) (new-state-number next) flags next)))
-                    (cond
-                     (a
-                      (set-cdr! (car a) `((epsilon . ,(caar next)) ,@(cdar a)))
-                      a)
-                     (else
-                      #f))))))
-              ((+ *)
-               (let ((next (lp (cdr ls) n flags next)))
-                 (and
-                  next
-                  (let* ((new (lp '(epsilon)
-                                  (new-state-number next)
-                                  flags
-                                  next))
-                         (a (lp (cdar ls) (new-state-number new) flags new)))
-                    (and
-                     a
-                     (begin
-                       ;; for *, insert an epsilon transition as in ? above
-                       (if (eq? '* (caar ls))
-                           (set-cdr! (car a)
-                                     `((epsilon . ,(caar new)) ,@(cdar a))))
-                       ;; for both, insert a loop back to self
-                       (set-cdr! (car new)
-                                 `((epsilon . ,(caar a)) ,@(cdar new)))
-                       a))))))
-              ((submatch submatch-named)
-               ;; ignore submatches altogether
-               (lp (cons (sre-sequence (cdar ls)) (cdr ls)) n flags next))
-              (else
-               #f)))))
-         (else
-          #f)))))
-
-;; We don't really want to use this, we use the closure compilation
-;; below instead, but this is included for reference and testing the
-;; sre->nfa conversion.
-
-;; (define (nfa-match nfa str)
-;;   (let lp ((ls (string->list str)) (state (car nfa)) (epsilons '()))
-;;     (if (null? ls)
-;;         (zero? (car state))
-;;         (any (lambda (m)
-;;                (if (eq? 'epsilon (car m))
-;;                    (and (not (memv (cdr m) epsilons))
-;;                         (lp ls (assv (cdr m) nfa) (cons (cdr m) epsilons)))
-;;                    (and (or (eqv? (car m) (car ls))
-;;                             (and (pair? (car m))
-;;                                  (char<=? (caar m) (car ls))
-;;                                  (char<=? (car ls) (cdar m))))
-;;                         (lp (cdr ls) (assv (cdr m) nfa) '()))))
-;;              (cdr state)))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; NFA->DFA compilation
-;;
-;; During processing, the DFA is a list of the form:
-;;
-;;   ((NFA-states ...) accepting-state? transitions ...)
-;;
-;; where the transitions are as in the NFA, except there are no
-;; epsilons, duplicate characters or overlapping char-set ranges, and
-;; the states moved to are closures (sets of NFA states).  Multiple
-;; DFA states may be accepting states.
-
-(define (nfa->dfa nfa . o)
-  (let ((max-states (and (pair? o) (car o))))
-    (let lp ((ls (list (nfa-closure nfa (list (caar nfa)))))
-             (i 0)
-             (res '()))
-      (cond
-       ((null? ls)
-        (dfa-renumber (reverse res)))
-       ((assoc (car ls) res)
-        (lp (cdr ls) i res))
-       (else
-        (let* ((states (car ls))
-               (trans (nfa-state-transitions nfa states))
-               (accept? (and (memv 0 states) #t)))
-          (and (or (not max-states) (< (+ i 1) max-states))
-               (lp (append (map cdr trans) (cdr ls))
-                   (+ i 1)
-                   `((,states ,accept? ,@trans) ,@res)))))))))
-
-;; When the conversion is complete we renumber the DFA sets-of-states
-;; in order and convert the result to a vector for fast lookup.
-(define (dfa-renumber dfa)
-  (let ((states (map cons (map car dfa) (zero-to (length dfa)))))
-    (define (renumber state)
-      (cdr (assoc state states)))
-    (list->vector
-     (map
-      (lambda (node)
-        (cons (cadr node)
-              (map (lambda (x) (cons (car x) (renumber (cdr x))))
-                   (cddr node)))) 
-      dfa))))
-
-;; Extract all distinct characters or ranges and the potential states
-;; they can transition to from a given set of states.  Any ranges that
-;; would overlap with distinct characters are split accordingly.
-(define (nfa-state-transitions nfa states)
-  (let lp ((trans '())   ;; list of (char . state) or ((char . char) . state)
-           (ls states)   ;; list of integers (remaining state numbers)
-           (res '()))    ;; (char state ...) or ((char . char) state ...)
-    (cond
-     ((null? trans)
-      (if (null? ls)
-          (map (lambda (x) (cons (car x) (nfa-closure nfa (cdr x))))
-               res)
-          (let ((node (assv (car ls) nfa)))
-            (lp (if node (cdr node) '()) (cdr ls) res))))
-     ((eq? 'epsilon (caar trans))
-      (lp (cdr trans) ls res))
-     (else
-      (lp (cdr trans) ls (nfa-join-transitions! res (car trans)))))))
-
-(define (nfa-join-transitions! existing new)
-  (define (join ls elt state)
-    (if (not elt)
-        ls
-        (nfa-join-transitions! ls (cons elt state))))
-  (cond
-   ((char? (car new))
-    (let ((ch (car new)))
-      (let lp ((ls existing) (res '()))
-        (cond
-         ((null? ls)
-          ;; done, just cons this on to the original list
-          (cons (list ch (cdr new)) existing))
-         ((eqv? ch (caar ls))
-          ;; add a new state to an existing char
-          (set-cdr! (car ls) (insert-sorted (cdr new) (cdar ls)))
-          existing)
-         ((and (pair? (caar ls))
-               (char<=? (caaar ls) ch)
-               (char<=? ch (cdaar ls)))
-          ;; split a range
-          (apply
-           (lambda (left right)
-             (cons (cons ch (insert-sorted (cdr new) (cdar ls)))
-                   (append (if left (list (cons left (cdar ls))) '())
-                           (if right (list (cons right (cdar ls))) '())
-                           res
-                           (cdr ls))))
-           (split-char-range (caar ls) (car new))))
-         (else
-          ;; keep looking
-          (lp (cdr ls) (cons (car ls) res)))))))
-   (else
-    (let ((lo (caar new))
-          (hi (cdar new)))
-      (let lp ((ls existing) (res '()))
-        (cond
-         ((null? ls)
-          (cons (list (car new) (cdr new)) existing))
-         ((and (char? (caar ls)) (char<=? lo (caar ls)) (char<=? (caar ls) hi))
-          ;; range enclosing a character
-          (apply
-           (lambda (left right)
-             (set-cdr! (car ls) (insert-sorted (cdr new) (cdar ls)))
-             (join (join existing left (cdr new)) right (cdr new)))
-           (split-char-range (car new) (caar ls))))
-         ((and (pair? (caar ls))
-               (or (and (char<=? (caaar ls) hi) (char<=? lo (cdaar ls)))
-                   (and (char<=? hi (caaar ls)) (char<=? (cdaar ls) lo))))
-          ;; overlapping ranges
-          (apply
-           (lambda (left1 left2 same right1 right2)
-             (let ((old-states (cdar ls)))
-               (set-car! (car ls) same)
-               (set-cdr! (car ls) (insert-sorted (cdr new) old-states))
-               (let* ((res (if right1
-                               (cons (cons right1 old-states) existing)
-                               existing))
-                      (res (if right2 (cons (cons right2 old-states) res) res)))
-                 (join (join res left1 (cdr new)) left2 (cdr new)))))
-           (intersect-char-ranges (car new) (caar ls))))
-         (else
-          (lp (cdr ls) (cons (car ls) res)))))))))
-
-(define (char-range c1 c2)
-  (if (eqv? c1 c2) c1 (cons c1 c2)))
-
-;; assumes ch is included in the range
-(define (split-char-range range ch)
-  (list
-   (and (not (eqv? ch (car range)))
-        (char-range (car range) (integer->char (- (char->integer ch) 1))))
-   (and (not (eqv? ch (cdr range)))
-        (char-range (integer->char (+ (char->integer ch) 1)) (cdr range)))))
-
-;; returns (possibly #f) char ranges:
-;;    a-only-1  a-only-2  a-and-b  b-only-1  b-only-2
-(define (intersect-char-ranges a b)
-  (if (char>? (car a) (car b))
-      (reverse (intersect-char-ranges b a))
-      (let ((a-lo (car a))
-            (a-hi (cdr a))
-            (b-lo (car b))
-            (b-hi (cdr b)))
-        (list
-         (and (char<? a-lo b-lo)
-              (char-range a-lo (integer->char (- (char->integer b-lo) 1))))
-         (and (char>? a-hi b-hi)
-              (char-range (integer->char (+ (char->integer b-hi) 1)) a-hi))
-         (char-range b-lo (if (char<? b-hi a-hi) b-hi a-hi))
-         #f
-         (and (char>? b-hi a-hi)
-              (char-range (integer->char (+ (char->integer a-hi) 1)) b-hi))))))
-
-;; The `closure' of a list of NFA states - all states that can be
-;; reached from any of them using any number of epsilon transitions.
-(define (nfa-closure nfa states)
-  (let lp ((ls states)
-           (res '()))
-    (cond
-     ((null? ls)
-      res)
-     ((memv (car ls) res)
-      (lp (cdr ls) res))
-     (else
-      (lp (append (map cdr
-                       (filter (lambda (trans) (eq? 'epsilon (car trans)))
-                               (cdr (assv (car ls) nfa))))
-                  (cdr ls))
-          (insert-sorted (car ls) res))))))
-
-;; insert an integer uniquely into a sorted list
-(define (insert-sorted n ls)
-  (cond
-   ((null? ls)
-    (cons n '()))
-   ((<= n (car ls))
-    (if (= n (car ls))
-        ls
-        (cons n ls)))
-   (else
-    (cons (car ls) (insert-sorted n (cdr ls))))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; DFAs don't give us match information, so once we match and
-;; determine the start and end, we need to recursively break the
-;; problem into smaller DFAs to get each submatch.
-;;
-;; See http://compilers.iecc.com/comparch/article/07-10-026
-
-(define (sre-match-extractor sre)
-  (let lp ((sre sre) (n 1) (submatch-deps? #f))
-    (cond
-     ((not (sre-has-submatchs? sre))
-      (if (not submatch-deps?)
-          (lambda (str i j matches) j)
-          (let ((dfa (nfa->dfa (sre->nfa sre))))
-            (lambda (str i j matches)
-              (dfa-match/longest dfa str i j)))))
-     ((pair? sre)
-      (case (car sre)
-        ((: seq)
-         (let* ((right (sre-sequence (cddr sre)))
-                (match-left (lp (cadr sre) n #t))
-                (match-right
-                 (lp right (+ n (sre-count-submatches (cadr sre))) #t)))
-           (lambda (str i j matches)
-             (let lp ((k j) (best #f))
-               (if (< k i)
-                   best
-                   (let* ((middle (match-left str i k matches))
-                          (end (and middle
-                                    (eqv? middle k)
-                                    (match-right str middle j matches))))
-                     (if (eqv? end j)
-                         end
-                         (lp (- k 1)
-                             (if (or (not best) (and end (> end best)))
-                                 end
-                                 best)))))))))
-        ((or)
-         (let* ((rest (sre-alternate (cddr sre)))
-                (match-first
-                 (lp (cadr sre) n #t))
-                (match-rest
-                 (lp rest
-                     (+ n (sre-count-submatches (cadr sre)))
-                     submatch-deps?)))
-           (lambda (str i j matches)
-             (let ((k (match-first str i j matches)))
-               (if (eqv? k j)
-                   k
-                   (match-rest str i j matches))))))
-        ((* +)
-         (letrec ((match-once
-                   (lp (sre-sequence (cdr sre)) n #t))
-                  (match-all
-                   (lambda (str i j matches)
-                     (let ((k (match-once str i j matches)))
-                       (if (and k (< i k))
-                           (match-all str k j matches)
-                           i)))))
-           (if (eq? '* (car sre))
-               match-all
-               (lambda (str i j matches)
-                 (let ((k (match-once str i j matches)))
-                   (and k
-                        (match-all str k j matches)))))))
-        ((?)
-         (let ((match-once (lp (sre-sequence (cdr sre)) n #t)))
-           (lambda (str i j matches)
-             (let ((k (match-once str i j matches)))
-               (or k i)))))
-        ((submatch)
-         (let ((match-one
-                (lp (sre-sequence (cdr sre)) (+ n 1) #t)))
-           (lambda (str i j matches)
-             (let ((res (match-one str i j matches)))
-               (cond
-                ((number? res)
-                 (irregex-match-start-index-set! matches n i)
-                 (irregex-match-end-index-set! matches n res)))
-               res))))
-        (else
-         (error "unknown regexp operator" (car sre)))))
-     (else
-      (error "unknown regexp" sre)))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; closure compilation - we use this for non-regular expressions
-;; instead of an interpreted NFA matcher
-
-(define (sre->procedure sre . o)
-  (define names
-    (if (and (pair? o) (pair? (cdr o))) (cadr o) (sre-names sre 1 '())))
-  (let lp ((sre sre)
-           (n 1)
-           (flags (if (pair? o) (car o) ~none))
-           (next (lambda (str i matches fail) i)))
-    (define (rec sre) (lp sre n flags next))
-    (cond
-     ((pair? sre)
-      (if (string? (car sre))
-          (sre-cset->procedure
-           (sre->cset (car sre) (flag-set? flags ~case-insensitive?))
-           next)
-          (case (car sre)
-            ((~ - & /)
-             (sre-cset->procedure
-              (sre->cset sre (flag-set? flags ~case-insensitive?))
-              next))
-            ((or)
-             (case (length (cdr sre))
-               ((0) (lambda (str i matches fail) (fail)))
-               ((1) (rec (cadr sre)))
-               (else
-                (let* ((first (rec (cadr sre)))
-                       (rest (lp (sre-alternate (cddr sre))
-                                 (+ n (sre-count-submatches (cadr sre)))
-                                 flags
-                                 next)))
-                  (lambda (str i matches fail)
-                    (first str i matches (lambda () (rest str i matches fail))))))))
-            ((w/case)
-             (lp (sre-sequence (cdr sre))
-                 n
-                 (flag-clear flags ~case-insensitive?)
-                 next))
-            ((w/nocase)
-             (lp (sre-sequence (cdr sre))
-                 n
-                 (flag-join flags ~case-insensitive?)
-                 next))
-            ((w/utf8)
-             (lp (sre-sequence (cdr sre)) n (flag-join flags ~utf8?) next))
-            ((w/noutf8)
-             (lp (sre-sequence (cdr sre)) n (flag-clear flags ~utf8?) next))
-            ((seq :)
-             (case (length (cdr sre))
-               ((0) next)
-               ((1) (rec (cadr sre)))
-               (else
-                (let ((rest (lp (sre-sequence (cddr sre))
-                                (+ n (sre-count-submatches (cadr sre)))
-                                flags
-                                next)))
-                  (lp (cadr sre) n flags rest)))))
-            ((?)
-             (let ((body (rec (sre-sequence (cdr sre)))))
-               (lambda (str i matches fail)
-                 (body str i matches (lambda () (next str i matches fail))))))
-            ((??)
-             (let ((body (rec (sre-sequence (cdr sre)))))
-               (lambda (str i matches fail)
-                 (next str i matches (lambda () (body str i matches fail))))))
-            ((*)
-             (cond
-              ((sre-empty? (sre-sequence (cdr sre)))
-               (error "invalid sre: empty *" sre))
-              (else
-               (letrec ((body
-                         (lp (sre-sequence (cdr sre))
-                             n
-                             flags
-                             (lambda (str i matches fail)
-                               (body str
-                                     i
-                                     matches
-                                     (lambda () (next str i matches fail)))))))
-                 (lambda (str i matches fail)
-                   (body str i matches (lambda () (next str i matches fail))))))))
-            ((*?)
-             (cond
-              ((sre-empty? (sre-sequence (cdr sre)))
-               (error "invalid sre: empty *?" sre))
-              (else
-               (letrec ((body
-                         (lp (sre-sequence (cdr sre))
-                             n
-                             flags
-                             (lambda (str i matches fail)
-                               (next str
-                                     i
-                                     matches
-                                     (lambda () (body str i matches fail)))))))
-                 (lambda (str i matches fail)
-                   (next str i matches (lambda () (body str i matches fail))))))))
-            ((+)
-             (lp (sre-sequence (cdr sre))
-                 n
-                 flags
-                 (rec (list '* (sre-sequence (cdr sre))))))
-            ((=)
-             (rec `(** ,(cadr sre) ,(cadr sre) ,@(cddr sre))))
-            ((>=)
-             (rec `(** ,(cadr sre) #f ,@(cddr sre))))
-            ((** **?)
-             (cond
-              ((or (and (number? (cadr sre))
-                        (number? (caddr sre))
-                        (> (cadr sre) (caddr sre)))
-                   (and (not (cadr sre)) (caddr sre)))
-               (lambda (str i matches fail) (fail)))
-              (else
-               (let* ((from (cadr sre))
-                      (to (caddr sre))
-                      (? (if (eq? '** (car sre)) '? '??))
-                      (* (if (eq? '** (car sre)) '* '*?))
-                      (sre (sre-sequence (cdddr sre)))
-                      (x-sre (sre-strip-submatches sre))
-                      (next (if to
-                                (if (= from to)
-                                    next
-                                    (fold (lambda (x next)
-                                            (lp `(,? ,sre) n flags next))
-                                          next
-                                          (zero-to (- to from))))
-                                (rec `(,* ,sre)))))
-                 (if (zero? from)
-                     next
-                     (lp `(seq ,@(map (lambda (x) x-sre) (zero-to (- from 1)))
-                               ,sre)
-                         n
-                         flags
-                         next))))))
-            ((word)
-             (rec `(seq bow ,@(cdr sre) eow)))
-            ((word+)
-             (rec `(seq bow (+ (& (or alphanumeric "_")
-                                  (or ,@(cdr sre)))) eow)))
-            ((posix-string)
-             (rec (string->sre (cadr sre))))
-            ((look-ahead)
-             (let ((check
-                    (lp (sre-sequence (cdr sre))
-                        n
-                        flags
-                        (lambda (str i matches fail) i))))
-               (lambda (str i matches fail)
-                 (if (check str i matches (lambda () #f))
-                     (next str i matches fail)
-                     (fail)))))
-            ((neg-look-ahead)
-             (let ((check
-                    (lp (sre-sequence (cdr sre))
-                        n
-                        flags
-                        (lambda (str i matches fail) i))))
-               (lambda (str i matches fail)
-                 (if (check str i matches (lambda () #f))
-                     (fail)
-                     (next str i matches fail)))))
-            ((look-behind)
-             (let ((check
-                    (lp (sre-sequence (cons '(* any) (cdr sre)))
-                        n
-                        flags
-                        (lambda (str i matches fail) i))))
-               (lambda (str i matches fail)
-                 (if (eqv? i (check (substring str 0 i) 0 matches (lambda () #f)))
-                     (next str i matches fail)
-                     (fail)))))
-            ((neg-look-behind)
-             (let ((check
-                    (lp (sre-sequence (cons '(* any) (cdr sre)))
-                        n
-                        flags
-                        (lambda (str i matches fail) i))))
-               (lambda (str i matches fail)
-                 (if (eqv? i (check (substring str 0 i) 0 matches (lambda () #f)))
-                     (fail)
-                     (next str i matches fail)))))
-            ((atomic)
-             (let ((once
-                    (lp (sre-sequence (cdr sre))
-                        n
-                        flags
-                        (lambda (str i matches fail) i))))
-               (lambda (str i matches fail)
-                 (let ((j (once str i matches (lambda () #f))))
-                   (if j
-                       (next str j matches fail)
-                       (fail))))))
-            ((if)
-             (let* ((test-submatches (sre-count-submatches (cadr sre)))
-                    (pass (lp (caddr sre) flags (+ n test-submatches) next))
-                    (fail (if (pair? (cdddr sre))
-                              (lp (cadddr sre)
-                                  (+ n test-submatches
-                                     (sre-count-submatches (caddr sre)))
-                                  flags
-                                  next)
-                              (lambda (str i matches fail) (fail)))))
-               (cond
-                ((or (number? (cadr sre)) (symbol? (cadr sre)))
-                 (let ((index
-                        (if (symbol? (cadr sre))
-                            (cond
-                             ((assq (cadr sre) names) => cdr)
-                             (else
-                              (error "unknown named backref in SRE IF" sre)))
-                            (cadr sre))))
-                   (lambda (str i matches fail2)
-                     (if (irregex-match-end-index matches index)
-                         (pass str i matches fail2)
-                         (fail str i matches fail2)))))
-                (else
-                 (let ((test (lp (cadr sre) n flags pass)))
-                   (lambda (str i matches fail2)
-                     (test str i matches (lambda () (fail str i matches fail2)))
-                     ))))))
-            ((backref backref-ci)
-             (let ((n (cond ((number? (cadr sre)) (cadr sre))
-                            ((assq (cadr sre) names) => cdr)
-                            (else (error "unknown backreference" (cadr sre)))))
-                   (compare (if (or (eq? (car sre) 'backref-ci)
-                                    (flag-set? flags ~case-insensitive?))
-                                string-ci=?
-                                string=?)))
-               (lambda (str i matches fail)
-                 (let ((s (irregex-match-substring matches n)))
-                   (if (not s)
-                       (fail)
-                       (let ((j (+ i (string-length s))))
-                         (if (and (<= j (string-length str))
-                                  (compare s (substring str i j)))
-                             (next str j matches fail)
-                             (fail))))))))
-            ((dsm)
-             (lp (sre-sequence (cdddr sre)) (+ n (cadr sre)) flags next))
-            ((submatch)
-             (let ((body
-                    (lp (sre-sequence (cdr sre))
-                        (+ n 1)
-                        flags
-                        (lambda (str i matches fail)
-                          (let ((old (irregex-match-end-index matches n)))
-                            (irregex-match-end-index-set! matches n i)
-                            (next str i matches
-                                  (lambda ()
-                                    (irregex-match-end-index-set! matches n old)
-                                    (fail))))))))
-               (lambda (str i matches fail)
-                 (let ((old (irregex-match-start-index matches n)))
-                   (irregex-match-start-index-set! matches n i)
-                   (body str i matches
-                         (lambda ()
-                           (irregex-match-start-index-set! matches n old)
-                           (fail)))))))
-            ((submatch-named)
-             (rec `(submatch ,@(cddr sre))))
-            (else
-             (error "unknown regexp operator" sre)))))
-     ((symbol? sre)
-      (case sre
-        ((any)
-         (lambda (str i matches fail)
-           (if (< i (string-length str))
-               (next str (+ i 1) matches fail)
-               (fail))))
-        ((nonl)
-         (lambda (str i matches fail)
-           (if (and (< i (string-length str))
-                    (not (eqv? #\newline (string-ref str i))))
-               (next str (+ i 1) matches fail)
-               (fail))))
-        ((bos)
-         (lambda (str i matches fail)
-           (if (zero? i) (next str i matches fail) (fail))))
-        ((bol)
-         (lambda (str i matches fail)
-           (if (or (zero? i) (eqv? #\newline (string-ref str (- i 1))))
-               (next str i matches fail)
-               (fail))))
-        ((bow)
-         (lambda (str i matches fail)
-           (if (and (or (zero? i)
-                        (not (char-alphanumeric? (string-ref str (- i 1)))))
-                    (< i (string-length str))
-                    (char-alphanumeric? (string-ref str i)))
-               (next str i matches fail)
-               (fail))))
-        ((eos)
-         (lambda (str i matches fail)
-           (if (>= i (string-length str)) (next str i matches fail) (fail))))
-        ((eol)
-         (lambda (str i matches fail)
-           (if (or (>= i (string-length str))
-                   (eqv? #\newline (string-ref str i)))
-               (next str i matches fail)
-               (fail))))
-        ((eow)
-         (lambda (str i matches fail)
-           (if (and (or (>= i (string-length str))
-                        (not (char-alphanumeric? (string-ref str i))))
-                    (> i 0)
-                    (char-alphanumeric? (string-ref str (- i 1))))
-               (next str i matches fail)
-               (fail))))
-        ((nwb)  ;; non-word-boundary
-         (lambda (str i matches fail)
-           (if (and (not (zero? i))
-                    (< i (string-length str))
-                    (if (char-alphanumeric? (string-ref str (- i 1)))
-                        (char-alphanumeric? (string-ref str i))
-                        (not (char-alphanumeric? (string-ref str i)))))
-               (next str i matches fail)
-               (fail))))
-        ((epsilon)
-         next)
-        (else
-         (let ((cell (assq sre sre-named-definitions)))
-           (if cell
-               (rec (cdr cell))
-               (error "unknown regexp" sre))))))
-     ((char? sre)
-      (if (flag-set? flags ~case-insensitive?)
-          (lambda (str i matches fail)
-            (if (and (< i (string-length str))
-                     (char-ci=? sre (string-ref str i)))
-                (next str (+ i 1) matches fail)
-                (fail)))
-          (lambda (str i matches fail)
-            (if (and (< i (string-length str))
-                     (eqv? sre (string-ref str i)))
-                (next str (+ i 1) matches fail)
-                (fail)))))
-     ((string? sre)
-      (rec (sre-sequence (string->list sre))))
-     (else
-      (error "unknown regexp" sre)))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Simple character sets as lists of ranges, as used in the NFA/DFA
-;; compilation.  This is not especially efficient, but is portable and
-;; scalable for any range of character sets.
-
-(define (sre-cset->procedure cset next)
-  (lambda (str i matches fail)
-    (if (and (< i (string-length str))
-             (cset-contains? cset (string-ref str i)))
-        (next str (+ i 1) matches fail)
-        (fail))))
-
-(define (plist->alist ls)
-  (let lp ((ls ls) (res '()))
-    (if (null? ls)
-        (reverse res)
-        (lp (cddr ls) (cons (cons (car ls) (cadr ls)) res)))))
-
-(define (alist->plist ls)
-  (let lp ((ls ls) (res '()))
-    (if (null? ls)
-        (reverse res)
-        (lp (cdr ls) (cons (cdar ls) (cons (caar ls) res))))))
-
-(define (sre->cset sre . o)
-  (let lp ((sre sre) (ci? (and (pair? o) (car o))))
-    (define (rec sre) (lp sre ci?))
-    (cond
-     ((pair? sre)
-      (if (string? (car sre))
-          (if ci?
-              (cset-case-insensitive (string->list (car sre)))
-              (string->list (car sre)))
-          (case (car sre)
-            ((~)
-             (cset-complement
-              (fold cset-union (rec (cadr sre)) (map rec (cddr sre)))))
-            ((&)
-             (fold cset-intersection (rec (cadr sre)) (map rec (cddr sre))))
-            ((-)
-             (fold (lambda (x res) (cset-difference res x))
-                   (rec (cadr sre))
-                   (map rec (cddr sre))))
-            ((/)
-             (let ((res (plist->alist (sre-flatten-ranges (cdr sre)))))
-               (if ci?
-                   (cset-case-insensitive res)
-                   res)))
-            ((or)
-             (fold cset-union (rec (cadr sre)) (map rec (cddr sre))))
-            ((w/case)
-             (lp (sre-alternate (cdr sre)) #f))
-            ((w/nocase)
-             (lp (sre-alternate (cdr sre)) #t))
-            (else
-             (error "not a valid sre char-set operator" sre)))))
-     ((char? sre) (rec (list (string sre))))
-     ((string? sre) (rec (list sre)))
-     (else
-      (let ((cell (assq sre sre-named-definitions)))
-        (if cell
-            (rec (cdr cell))
-            (error "not a valid sre char-set" sre)))))))
-
-;;;; another debugging utility
-;; (define (cset->sre cset)
-;;   (let lp ((ls cset) (chars '()) (ranges '()))
-;;     (cond
-;;      ((null? ls)
-;;       (sre-alternate
-;;        (append
-;;         (if (pair? chars) (list (list (list->string chars))) '())
-;;         (if (pair? ranges) (list (cons '/ (alist->plist ranges))) '()))))
-;;      ((char? (car ls)) (lp (cdr ls) (cons (car ls) chars) ranges))
-;;      (else (lp (cdr ls) chars (cons (car ls) ranges))))))
-
-(define (cset-contains? cset ch)
-  (find (lambda (x)
-          (or (eqv? x ch)
-              (and (pair? x) (char<=? (car x) ch) (char<=? ch (cdr x)))))
-        cset))
-
-(define (cset-range x)
-  (if (char? x) (cons x x) x))
-
-(define (char-ranges-overlap? a b)
-  (if (pair? a)
-      (if (pair? b)
-          (or (and (char<=? (car a) (cdr b)) (char<=? (car b) (cdr a)))
-              (and (char<=? (cdr b) (car a)) (char<=? (cdr a) (car b))))
-          (and (char<=? (car a) b) (char<=? b (cdr a))))
-      (if (pair? b)
-          (char-ranges-overlap? b a)
-          (eqv? a b))))
-
-(define (char-ranges-union a b)
-  (cons (if (char<=? (car a) (car b)) (car a) (car b))
-        (if (char>=? (cdr a) (cdr b)) (cdr a) (cdr b))))
-
-(define (cset-union a b)
-  (cond ((null? b) a)
-        ((find-tail (lambda (x) (char-ranges-overlap? x (car b))) a)
-         => (lambda (ls)
-              (cset-union
-               (cset-union (append (take-up-to a ls) (cdr ls))
-                           (list (char-ranges-union (cset-range (car ls))
-                                                    (cset-range (car b)))))
-               (cdr b))))
-        (else (cset-union (cons (car b) a) (cdr b)))))
-
-(define (cset-difference a b)
-  (cond ((null? b) a)
-        ((not (car b)) (cset-difference a (cdr b)))
-        ((find-tail (lambda (x) (char-ranges-overlap? x (car b))) a)
-         => (lambda (ls)
-              (apply
-               (lambda (left1 left2 same right1 right2)
-                 (let* ((a (append (take-up-to a ls) (cdr ls)))
-                        (a (if left1 (cons left1 a) a))
-                        (a (if left2 (cons left2 a) a))
-                        (b (if right1 (cset-union b (list right1)) b))
-                        (b (if right2 (cset-union b (list right2)) b)))
-                   (cset-difference a b)))
-               (intersect-char-ranges (cset-range (car ls))
-                                      (cset-range (car b))))))
-        (else (cset-difference a (cdr b)))))
-
-(define (cset-intersection a b)
-  (let intersect ((a a) (b b) (res '()))
-    (cond ((null? b) res)
-          ((find-tail (lambda (x) (char-ranges-overlap? x (car b))) a)
-           => (lambda (ls)
-                (apply
-                 (lambda (left1 left2 same right1 right2)
-                   (let* ((a (append (take-up-to a ls) (cdr ls)))
-                          (a (if left1 (cons left1 a) a))
-                          (a (if left2 (cons left2 a) a))
-                          (b (if right1 (cset-union b (list right1)) b))
-                          (b (if right2 (cset-union b (list right2)) b)))
-                     (intersect a b (cset-union res (list same)))))
-                 (intersect-char-ranges (cset-range (car ls))
-                                        (cset-range (car b))))))
-          (else (intersect a (cdr b) res)))))
-
-(define (cset-complement a)
-  (cset-difference (sre->cset *all-chars*) a))
-
-(define (cset-case-insensitive a)
-  (let lp ((ls a) (res '()))
-    (cond ((null? ls) (reverse res))
-          ((and (char? (car ls)) (char-alphabetic? (car ls)))
-           (let ((c2 (char-altcase (car ls)))
-                 (res (cons (car ls) res)))
-             (lp (cdr ls) (if (cset-contains? res c2) res (cons c2 res)))))
-          ((and (pair? (car ls))
-                (char-alphabetic? (caar ls))
-                (char-alphabetic? (cdar ls)))
-           (lp (cdr ls)
-               (cset-union (cset-union res (list (car ls)))
-                           (list (cons (char-altcase (caar ls))
-                                       (char-altcase (cdar ls)))))))
-          (else (lp (cdr ls) (cset-union res (list (car ls))))))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; match and replace utilities
-
-(define (irregex-fold irx kons knil str . o)
-  (let* ((irx (irregex irx))
-         (matches (irregex-new-matches irx))
-         (finish (if (pair? o) (car o) (lambda (i acc) acc)))
-         (start (if (and (pair? o) (pair? (cdr o))) (cadr o) 0))
-         (end (if (and (pair? o) (pair? (cdr o)) (pair? (cddr o)))
-                  (caddr o)
-                  (string-length str))))
-    (irregex-match-string-set! matches str)
-    (let lp ((i start) (acc knil))
-      (if (>= i end)
-          (finish i acc)
-          (let ((m (irregex-search/matches irx str i end matches)))
-            (if (not m)
-                (finish i acc)
-                (let* ((end (irregex-match-end m 0))
-                       (acc (kons i m acc)))
-                  (irregex-reset-matches! matches)
-                  (lp end acc))))))))
-
-(define (irregex-replace irx str . o)
-  (let ((m (irregex-search (irregex irx) str)))
-    (and
-     m
-     (string-cat-reverse
-      (cons (substring str (irregex-match-end m 0) (string-length str))
-            (append (irregex-apply-match m o)
-                    (list (substring str 0 (irregex-match-start m 0)))))))))
-
-(define (irregex-replace/all irx str . o)
-  (irregex-fold
-   irx
-   (lambda (i m acc)
-     (let ((m-start (irregex-match-start m 0)))
-       (append (irregex-apply-match m o)
-               (if (= i m-start)
-                   acc
-                   (cons (substring str i m-start) acc)))))
-   '()
-   str
-   (lambda (i acc)
-     (let ((end (string-length str)))
-       (string-cat-reverse (if (= i end)
-                               acc
-                               (cons (substring str i end) acc)))))))
-
-(define (irregex-apply-match m ls)
-  (let lp ((ls ls) (res '()))
-    (if (null? ls)
-        res
-        (cond
-         ((integer? (car ls))
-          (lp (cdr ls)
-              (cons (or (irregex-match-substring m (car ls)) "") res)))
-         ((procedure? (car ls))
-          (lp (cdr ls) (cons ((car ls) m) res)))
-         ((symbol? (car ls))
-          (case (car ls)
-            ((pre)
-             (lp (cdr ls)
-                 (cons (substring (irregex-match-string m)
-                                  0
-                                  (irregex-match-start m 0))
-                       res)))
-            ((post)
-             (lp (cdr ls)
-                 (cons (substring (irregex-match-string m)
-                                  (irregex-match-end m 0)
-                                  (string-length (irregex-match-string m)))
-                       res)))
-            (else (error "unknown match replacement" (car ls)))))
-         (else
-          (lp (cdr ls) (cons (car ls) res)))))))
+;;;; irregex.scm - container for irregex-core.scm
+;
+; Copyright (c) 2010, The Chicken Team
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following
+; conditions are met:
+;
+;   Redistributions of source code must retain the above copyright notice, this list of conditions and the following
+;     disclaimer.
+;   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+;     disclaimer in the documentation and/or other materials provided with the distribution.
+;   Neither the name of the author nor the names of its contributors may be used to endorse or promote
+;     products derived from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
+; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+; POSSIBILITY OF SUCH DAMAGE.
+
+
+
+(declare (unit irregex))
+
+(declare
+  (no-procedure-checks)
+  (fixnum)
+  (export
+   ##sys#glob->regexp
+   irregex
+   irregex-apply-match
+   irregex-dfa
+   irregex-dfa/extract
+   irregex-dfa/search
+   irregex-extract
+   irregex-flags
+   irregex-fold
+   irregex-fold/chunked
+   irregex-lengths
+   irregex-match
+   irregex-match?
+   irregex-match-data?
+   irregex-match-end
+   irregex-match-end-chunk
+   irregex-match-end-index
+   irregex-match-names
+   irregex-match-num-submatches
+   irregex-match-start
+   irregex-match-start-chunk
+   irregex-match-start-index
+   irregex-match-string
+   irregex-match-subchunk
+   irregex-match-substring
+   irregex-match-valid-index?
+   irregex-match/chunked
+   irregex-names
+   irregex-new-matches
+   irregex-nfa
+   irregex-num-submatches
+   irregex-opt
+   irregex-quote
+   irregex-replace
+   irregex-replace/all
+   irregex-reset-matches!
+   irregex-search
+   irregex-search/chunked
+   irregex-search/matches
+   irregex-split
+   irregex-submatches
+   irregex? 
+   make-irregex-chunker
+   maybe-string->sre
+   irregex-search/chunked
+   sre->irregex
+   sre->string
+   string->irregex
+   string->sre
+   ))
+
+(include "common-declarations.scm")
+
+(register-feature! 'irregex)
+
+(define-syntax (build-cache x r c)
+  ;; (build-cache N ARG FAIL) 
+  (let* ((n (cadr x))
+	 (n2 (* n 2))
+	 (arg (caddr x))
+	 (fail (cadddr x))
+	 (%cache (r 'cache))
+	 (%index (r 'index))
+	 (%arg (r 'arg))
+	 (%let (r 'let))
+	 (%let* (r 'let*))
+	 (%if (r 'if))
+	 (%fx+ (r 'fx+))
+	 (%fxmod (r 'fxmod))
+	 (%equal? (r 'equal?))
+	 (%quote (r 'quote))
+	 (%tmp (r 'tmp))
+	 (%begin (r 'begin))
+	 (cache (make-vector (add1 n2) #f)))
+    (##sys#setslot cache n2 0)		; last slot: current index
+    `(,%let* ((,%cache (,%quote ,cache)) ; we mutate a literal vector
+	      (,%arg ,arg))
+	     ,(let fold ((i 0))
+		(if (fx>= i n)
+		    ;; this should be thread-safe: a context-switch can only
+		    ;; happen before this code and in the call to FAIL.
+		    `(,%let ((,%tmp ,fail)
+			     (,%index (##sys#slot ,%cache ,n2)))
+			    (##sys#setslot ,%cache ,%index ,%arg)
+			    (##sys#setslot ,%cache (,%fx+ ,%index 1) ,%tmp)
+			    (##sys#setislot 
+			     ,%cache ,n2
+			     (##core#inline "C_u_fixnum_modulo" (,%fx+ ,%index 2) ,n2))
+			    ,%tmp)
+		    `(,%if (,%equal? (##sys#slot ,%cache ,(* i 2)) ,%arg)
+			   (##sys#slot ,%cache ,(add1 (* i 2)))
+			   ,(fold (add1 i))))))))
+
+(define-compiler-syntax %%string-copy!
+  (syntax-rules ()
+    ((_ to tstart from fstart fend)
+     (let ((x to)
+	   (y tstart)
+	   (z from)
+	   (u fstart)
+	   (v fend))
+       (##core#inline "C_substring_copy" z x u v y)))))
+
+(define-compiler-syntax %substring=?
+  (syntax-rules ()
+    ((_ a b start1 start2 len)
+     (##core#inline "C_substring_compare" a b start1 start2 len))))
+
+(define-compiler-syntax make-irregex 
+  (syntax-rules ()
+    ((_ dfa dfa/search dfa/extract nfa flags submatches lengths names)
+     (##sys#make-structure
+      'regexp dfa dfa/search dfa/extract nfa flags submatches lengths names))))
+
+(define-compiler-syntax make-irregex-match
+  (syntax-rules ()
+    ((_ count names)
+     (##sys#make-structure
+      'regexp-match
+      (make-vector (+ (* 4 (+ 2 count)) 3) #f) ; #1: submatches
+      names				       ; #2: (guess)
+      #f				       ; #3: chunka
+      #f))))				       ; #4: fail
+
+(define-compiler-syntax bit-shl
+  (syntax-rules ()
+    ((_ n i) (fxshl n i))))
+
+(define-compiler-syntax bit-shr
+  (syntax-rules ()
+    ((_ n i) (fxshr n i))))
+
+(define-compiler-syntax bit-not
+  (syntax-rules ()
+    ((_ n) (fxnot n))))
+
+(define-compiler-syntax bit-ior
+  (syntax-rules ()
+    ((_ a b) (fxior a b))))
+
+(define-compiler-syntax bit-and
+  (syntax-rules ()
+    ((_ a b) (fxand a b))))
+
+(define-compiler-syntax match-vector-ref
+  (syntax-rules ()
+    ((_ m i) (##sys#slot (##sys#slot m 1) i))))
+
+(define-compiler-syntax match-vector-set!
+  (syntax-rules ()
+    ((_ m i x) (##sys#setslot (##sys#slot m 1) i x))))
+
+(define-compiler-syntax irregex-match-start-chunk-set!
+  (syntax-rules ()
+    ((_ m n start)
+     (vector-set! (##sys#slot m 1) (* n 4) start))))
+
+(define-compiler-syntax irregex-match-start-index-set!
+  (syntax-rules ()
+    ((_ m n start)
+     (vector-set! (##sys#slot m 1) (+ 1 (* n 4)) start))))
+
+(define-compiler-syntax irregex-match-end-chunk-set!
+  (syntax-rules ()
+    ((_ m n end)
+     (vector-set! (##sys#slot m 1) (+ 2 (* n 4)) end))))
+
+(define-compiler-syntax irregex-match-end-index-set!
+  (syntax-rules ()
+    ((_ m n end)
+     (vector-set! (##sys#slot m 1) (+ 3 (* n 4)) end))))
+
+(define-compiler-syntax %irregex-error
+  (syntax-rules ()
+    ((_ args ...)
+     (error args ...))))
+
+(include "irregex-core.scm")
+(include "irregex-utils.scm")
+
+(define ##sys#glob->regexp
+  (let ((list->string list->string)
+        (string->list string->list))
+    (lambda (s #!optional sre?)
+      (##sys#check-string s 'glob->regexp)
+      (let ((sre
+	     (cons 
+	      ':
+	      (let loop ((cs (string->list s)) (dir #t))
+		(if (null? cs)
+		    '()
+		    (let ((c (car cs))
+			  (rest (cdr cs)) )
+		      (cond ((char=? c #\*) 
+			     (if dir
+				 `((or (: (~ ("./\\"))
+					  (* (~ ("/\\"))))
+				       (* (~ ("./\\"))))
+				   ,@(loop rest #f))
+				 `((* (~ ("/\\"))) ,@(loop rest #f))))
+			    ((char=? c #\?)  (cons 'any (loop rest #f)))
+			    ((char=? c #\[)
+			     (let loop2 ((rest rest) (s '()))
+			       (cond ((not (pair? rest))
+				      (error 'glob->regexp
+					     "unexpected end of character class" s))
+				     ((char=? #\] (car rest))
+				      `((or ,@s) ,@(loop (cdr rest) #f)))
+				     ((and (pair? (cdr rest))
+					   (pair? (cddr rest))
+					   (char=? #\- (cadr rest)) )
+				      (loop2 (cdddr rest)
+					     (cons `(/ ,(car rest) ,(caddr rest)) s)))
+				     ((and (pair? (cdr rest))
+					   (char=? #\- (car rest)))
+				      (loop2 (cddr rest)
+					     (cons `(~ ,(cadr rest)) s)))
+				     (else
+				      (loop2 (cdr rest) (cons (car rest) s))))))
+			    (else (cons c (loop rest (memq c '(#\\ #\/))))))))))))
+	(if sre? sre (irregex sre))))))
diff --git a/library.scm b/library.scm
index c4b99b0d..d601a02f 100644
--- a/library.scm
+++ b/library.scm
@@ -76,7 +76,8 @@
 #define C_a_get_current_seconds(ptr, c, dummy)  C_flonum(ptr, time(NULL))
 #define C_peek_c_string_at(ptr, i)    ((C_char *)(((C_char **)ptr)[ i ]))
 
-static C_word fast_read_line_from_file(C_word str, C_word port, C_word size) {
+static C_word
+fast_read_line_from_file(C_word str, C_word port, C_word size) {
   int n = C_unfix(size);
   int i;
   int c;
@@ -101,7 +102,7 @@ static C_word fast_read_line_from_file(C_word str, C_word port, C_word size) {
 }
 
 static C_word
-fast_read_string_from_file (C_word dest, C_word port, C_word len, C_word pos)
+fast_read_string_from_file(C_word dest, C_word port, C_word len, C_word pos)
 {
   int n = C_unfix (len);
   char * buf = ((char *)C_data_pointer (dest) + C_unfix (pos));
@@ -1739,7 +1740,8 @@ EOF
 
 (define (##sys#check-port x . loc)
   (unless (%port? x)
-    (##sys#signal-hook #:type-error (and (pair? loc) (car loc)) "argument is not a port" x) ) )
+    (##sys#signal-hook
+     #:type-error (and (pair? loc) (car loc)) "argument is not a port" x) ) )
 
 (define (##sys#check-port-mode port mode . loc)
   (unless (eq? mode (##sys#slot port 1))
diff --git a/manual/Supported language b/manual/Supported language
index 3fe014cd..d7be9c82 100644
--- a/manual/Supported language	
+++ b/manual/Supported language	
@@ -19,7 +19,7 @@
 * [[Unit ports]] I/O ports
 * [[Unit files]] File and pathname operations
 * [[Unit extras]] Useful utility definitions
-* [[Unit regex]] Regular expressions
+* [[Unit irregex]] Regular expressions
 * [[Unit srfi-1]] List Library
 * [[Unit srfi-4]] Homogeneous numeric vectors
 * [[Unit srfi-13]] String library
diff --git a/manual/Unit extras b/manual/Unit extras
index f7f28cca..3d0163a8 100644
--- a/manual/Unit extras	
+++ b/manual/Unit extras	
@@ -196,4 +196,4 @@ false. Returns a string with the accumulated characters.
 ---
 Previous: [[Unit files]]
 
-Next: [[Unit regex]]
+Next: [[Unit irregex]]
diff --git a/manual/Unit irregex b/manual/Unit irregex
new file mode 100644
index 00000000..387b8422
--- /dev/null
+++ b/manual/Unit irregex	
@@ -0,0 +1,826 @@
+[[tags: manual]]
+[[toc:]]
+
+== Unit irregex
+
+This library unit provides support for regular expressions, using the
+powerful ''irregex'' regular expression engine by Alex Shinn.  It
+supports both POSIX syntax with various (irregular) PCRE extensions,
+as well as SCSH's SRE syntax, with various aliases for commonly used
+patterns.  DFA matching is used when possible, otherwise a
+closure-compiled NFA approach is used.  Matching may be performed over
+standard Scheme strings, or over arbitrarily chunked streams of
+strings.
+
+On systems that support dynamic loading, the {{irregex}} unit can
+be made available in the Chicken interpreter ({{csi}}) by entering
+
+<enscript highlight=scheme>
+(require-extension irregex)
+</enscript>
+
+[[toc:]]
+
+=== Specification
+
+==== Procedures
+
+===== irregex
+===== string->irregex
+===== sre->irregex
+
+<procedure>(irregex <posix-string-or-sre> [<options> ...])</procedure><br>
+<procedure>(string->irregex <posix-string> [<options> ...])</procedure><br>
+<procedure>(sre->irregex <sre> [<options> ...])</procedure><br>
+
+Compiles a regular expression from either a POSIX-style regular
+expression string (with most PCRE extensions) or an SCSH-style SRE.
+There is no {{(rx ...)}} syntax - just use normal Scheme lists, with
+{{quasiquote}} if you like.
+
+Technically a string by itself could be considered a valid (though
+rather silly) SRE, so if you want to just match a literal string you
+should use something like {{(irregex `(: ,str))}}, or use the explicit
+{{(sre->irregex str)}}.
+
+The options are a list of any of the following symbols:
+
+; {{'i}}, {{'case-insensitive}} : match case-insensitively
+; {{'m}}, {{'multi-line}} : treat string as multiple lines (effects {{^}} and {{$}})
+; {{'s}}, {{'single-line}} : treat string as a single line ({{.}} can match newline)
+; {{'utf8}} : utf8-mode (assumes strings are byte-strings)
+; {{'fast}} : try to optimize the regular expression
+; {{'small}} : try to compile a smaller regular expression
+; {{'backtrack}} : enforce a backtracking implementation
+
+The {{'fast}} and {{'small}} options are heuristic guidelines and will
+not necessarily make the compiled expression faster or smaller.
+
+===== string->sre
+===== maybe-string->sre
+
+<procedure>(string->sre <str>)</procedure><br>
+<procedure>(maybe-string->sre <obj>)</procedure><br>
+
+For backwards compatibility, procedures to convert a POSIX string into
+an SRE.
+
+{{maybe-string->sre}} does the same thing, but only if the argument is
+a string, otherwise it assumes {{<obj>}} is an SRE and returns it
+as-is.  This is useful when you want to provide an API that allows
+either a POSIX string or SRE (like {{irregex}} or {{irregex-search}}
+below) - it ensures the result is an SRE.
+
+===== irregex?
+
+<procedure>(irregex? <obj>)</procedure><br>
+
+Returns {{#t}} iff the object is a regular expression.
+
+===== irregex-search
+
+<procedure>(irregex-search <irx> <str> [<start> <end>])</procedure>
+
+Searches for any instances of the pattern {{<irx>}} (a POSIX string, SRE
+sexp, or pre-compiled regular expression) in {{<str>}}, optionally between
+the given range.  If a match is found, returns a match object,
+otherwise returns {{#f}}.
+
+Match objects can be used to query the original range of the string or
+its submatches using the {{irregex-match-*}} procedures below.
+
+Examples:
+
+<enscript highlight=scheme>
+(irregex-search "foobar" "abcFOOBARdef") => #f
+
+(irregex-search "foobar" "abcFOOBARdef" 'i) => #<match>
+
+(irregex-search '(w/nocase "foobar") "abcFOOBARdef") => #<match>
+</enscript>
+
+Note, the actual match result is represented by a vector in the
+default implementation.  Throughout this manual, we'll just write
+{{#<match>}} to show that a successful match was returned when the
+details are not important.
+
+Matching follows the POSIX leftmost, longest semantics, when
+searching.  That is, of all possible matches in the string,
+{{irregex-search}} will return the match at the first position
+(leftmost).  If multiple matches are possible from that same first
+position, the longest match is returned.
+
+===== irregex-match
+
+<procedure>(irregex-match <irx> <str>)</procedure>
+
+Like {{irregex-search}}, but performs an anchored match against the
+beginning and end of the string, without searching.
+
+Examples:
+
+<enscript highlight=scheme>
+(irregex-match '(w/nocase "foobar") "abcFOOBARdef") => #f
+
+(irregex-match '(w/nocase "foobar") "FOOBAR") => #<match>
+</enscript>
+
+===== irregex-match-data?
+
+<procedure>(irregex-match-data? <obj>)</procedure>
+
+Returns {{#t}} iff the object is a successful match result from
+{{irregex-search}} or {{irregex-match}}.
+
+===== irregex-num-submatches
+===== irregex-match-num-submatches
+
+<procedure>(irregex-num-submatches <irx>)</procedure><br>
+<procedure>(irregex-match-num-submatches <match>)</procedure>
+
+Returns the number of numbered submatches that are defined in the
+irregex or match object.
+
+===== irregex-names
+===== irregex-match-names
+
+<procedure>(irregex-names <irx>)</procedure><br>
+<procedure>(irregex-match-names <match>)</procedure>
+
+Returns an association list of named submatches that are defined in
+the irregex or match object.  The {{car}} of each item in this list is
+the name of a submatch, the {{cdr}} of each item is the numerical
+submatch corresponding to this name.  If a named submatch occurs
+multiple times in the irregex, it will also occur multiple times in
+this list.
+
+===== irregex-match-valid-index?
+
+<procedure>(irregex-match-valid-index? <match> <index-or-name>)</procedure><br>
+
+Returns {{#t}} iff the {{index-or-name}} named submatch or index is
+defined in the {{match}} object.
+
+===== irregex-match-substring
+===== irregex-match-start-index
+===== irregex-match-end-index
+
+<procedure>(irregex-match-substring <match> [<index-or-name>])</procedure><br>
+<procedure>(irregex-match-start-index <match> [<index-or-name>])</procedure><br>
+<procedure>(irregex-match-end-index <match> [<index-or-name>])</procedure>
+
+Fetches the matched substring (or its start or end offset) at the
+given submatch index, or named submatch.  The entire match is index 0,
+the first 1, etc.  The default is index 0.
+
+===== irregex-match-subchunk
+
+<procedure>(irregex-match-subchunk <match> [<index-or-name>])</procedure>
+
+Generates a chunked data-type for the given match item, of the same
+type as the underlying chunk type (see Chunked String Matching below).
+This is only available if the chunk type specifies the get-subchunk
+API, otherwise an error is raised.
+
+===== irregex-replace
+===== irregex-replace/all
+
+<procedure>(irregex-replace <irx> <str> [<replacements> ...])</procedure><br>
+<procedure>(irregex-replace/all <irx> <str> [<replacements> ...])</procedure>
+
+Matches a pattern in a string, and replaces it with a (possibly empty)
+list of substitutions.  Each {{<replacement>}} can be either a string
+literal, a numeric index, a symbol (as a named submatch), or a
+procedure which takes one argument (the match object) and returns a
+string.
+
+Examples:
+
+<enscript highlight=scheme>
+(irregex-replace "[aeiou]" "hello world" "*") => "h*llo world"
+
+(irregex-replace/all "[aeiou]" "hello world" "*") => "h*ll* w*rld"
+</enscript>
+
+===== irregex-split
+===== irregex-extract
+
+<procedure>(irregex-split <irx> <str> [<start> <end>])</procedure><br>
+<procedure>(irregex-extract <irx> <str> [<start> <end>])</procedure>
+
+{{irregex-split}} splits the string {{<str>}} into substrings divided
+by the pattern in {{<irx>}}.  {{irregex-extract}} does the opposite,
+returning a list of each instance of the pattern matched disregarding
+the substrings in between.
+
+===== irregex-fold
+
+<procedure>(irregex-fold <irx> <kons> <knil> <str> [<finish> <start> <end>])</procedure>
+
+This performs a fold operation over every non-overlapping place
+{{<irx>}} occurs in the string {{str}}.
+
+The {{<kons>}} procedure takes the following signature:
+
+<enscript highlight=scheme>
+(<kons> <from-index> <match> <seed>)
+</enscript>
+
+where {{<from-index>}} is the index from where we started searching
+(initially {{<start>}} and thereafter the end index of the last
+match), {{<match>}} is the resulting match-data object, and {{<seed>}}
+is the accumulated fold result starting with {{<knil>}}.
+
+The rationale for providing the {{<from-index>}} (which is not
+provided in the SCSH {{regexp-fold}} utility), is because this
+information is useful (e.g. for extracting the unmatched portion of
+the string before the current match, as needed in
+{{irregex-replace}}), and not otherwise directly accessible.
+
+The optional {{<finish>}} takes two arguments:
+
+<enscript highlight=scheme>
+(<finish> <from-index> <seed>)
+</enscript>
+
+which simiarly allows you to pick up the unmatched tail of the string,
+and defaults to just returning the {{<seed>}}.
+
+{{<start>}} and {{<end>}} are numeric indices letting you specify the
+boundaries of the string on which you want to fold.
+
+To extract all instances of a match out of a string, you can use
+
+<enscript highlight=scheme>
+(map irregex-match-substring
+     (irregex-fold <irx>
+                   (lambda (i m s) (cons m s))
+		   '()
+		   <str>
+		   (lambda (i s) (reverse s))))
+</enscript>
+
+==== Extended SRE Syntax
+
+Irregex provides the first native implementation of SREs (Scheme
+Regular Expressions), and includes many extensions necessary both for
+minimal POSIX compatibility, as well as for modern extensions found in
+libraries such as PCRE.
+
+The following table summarizes the SRE syntax, with detailed
+explanations following.
+
+  ;; basic patterns
+  <string>                          ; literal string
+  (seq <sre> ...)                   ; sequence
+  (: <sre> ...)
+  (or <sre> ...)                    ; alternation
+  
+  ;; optional/multiple patterns
+  (? <sre> ...)                     ; 0 or 1 matches
+  (* <sre> ...)                     ; 0 or more matches
+  (+ <sre> ...)                     ; 1 or more matches
+  (= <n> <sre> ...)                 ; exactly <n> matches
+  (>= <n> <sre> ...)                ; <n> or more matches
+  (** <from> <to> <sre> ...)        ; <n> to <m> matches
+  (?? <sre> ...)                    ; non-greedy (non-greedy) pattern: (0 or 1)
+  (*? <sre> ...)                    ; non-greedy kleene star
+  (**? <from> <to> <sre> ...)       ; non-greedy range
+  
+  ;; submatch patterns
+  (submatch <sre> ...)              ; numbered submatch
+  ($ <sre> ...)
+  (submatch-named <name> <sre> ...) ; named submatch
+  (=> <name> <sre> ...)
+  (backref <n-or-name>)             ; match a previous submatch
+  
+  ;; toggling case-sensitivity
+  (w/case <sre> ...)                ; enclosed <sre>s are case-sensitive
+  (w/nocase <sre> ...)              ; enclosed <sre>s are case-insensitive
+  
+  ;; character sets
+  <char>                            ; singleton char set
+  (<string>)                        ; set of chars
+  (or <cset-sre> ...)               ; set union
+  (~ <cset-sre> ...)                ; set complement (i.e. [^...])
+  (- <cset-sre> ...)                ; set difference
+  (& <cset-sre> ...)                ; set intersection
+  (/ <range-spec> ...)              ; pairs of chars as ranges
+  
+  ;; named character sets
+  any
+  nonl
+  ascii
+  lower-case     lower
+  upper-case     upper
+  alphabetic     alpha
+  numeric        num
+  alphanumeric   alphanum  alnum
+  punctuation    punct
+  graphic        graph
+  whitespace     white     space
+  printing       print
+  control        cntrl
+  hex-digit      xdigit
+  
+  ;; assertions and conditionals
+  bos eos                           ; beginning/end of string
+  bol eol                           ; beginning/end of line
+  bow eow                           ; beginning/end of word
+  nwb                               ; non-word-boundary
+  (look-ahead <sre> ...)            ; zero-width look-ahead assertion
+  (look-behind <sre> ...)           ; zero-width look-behind assertion
+  (neg-look-ahead <sre> ...)        ; zero-width negative look-ahead assertion
+  (neg-look-behind <sre> ...)       ; zero-width negative look-behind assertion
+  (atomic <sre> ...)                ; for (?>...) independent patterns
+  (if <test> <pass> [<fail>])       ; conditional patterns
+  commit                            ; don't backtrack beyond this (i.e. cut)
+  
+  ;; backwards compatibility
+  (posix-string <string>)           ; embed a POSIX string literal
+
+===== Basic SRE Patterns
+
+The simplest SRE is a literal string, which matches that string
+exactly.
+
+<enscript highlight=scheme>
+(irregex-search "needle" "hayneedlehay") => #<match>
+</enscipt>
+
+By default the match is case-sensitive, though you can control this
+either with the compiler flags or local overrides:
+
+<enscript highlight=scheme>
+(irregex-search "needle" "haynEEdlehay") => #f
+
+(irregex-search (irregex "needle" 'i) "haynEEdlehay") => #<match>
+
+(irregex-search '(w/nocase "needle") "haynEEdlehay") => #<match>
+</enscript>
+
+You can use {{w/case}} to switch back to case-sensitivity inside a
+{{w/nocase}} or when the SRE was compiled with {{'i}}:
+
+<enscript highlight=scheme>
+(irregex-search '(w/nocase "SMALL" (w/case "BIG")) "smallBIGsmall") => #<match>
+
+(irregex-search '(w/nocase "small" (w/case "big")) "smallBIGsmall") => #f
+</enscript>
+
+Of course, literal strings by themselves aren't very interesting
+regular expressions, so we want to be able to compose them.  The most
+basic way to do this is with the {{seq}} operator (or its abbreviation
+{{:}}), which matches one or more patterns consecutively:
+
+<enscript highlight=scheme>
+(irregex-search '(: "one" space "two" space "three") "one two three") => #<match>
+</enscript>
+
+As you may have noticed above, the {{w/case}} and {{w/nocase}}
+operators allowed multiple SREs in a sequence - other operators that
+take any number of arguments (e.g. the repetition operators below)
+allow such implicit sequences.
+
+To match any one of a set of patterns use the {{or}} alternation
+operator:
+
+<enscript highlight=scheme>
+(irregex-search '(or "eeney" "meeney" "miney") "meeney") => #<match>
+
+(irregex-search '(or "eeney" "meeney" "miney") "moe") => #f
+</enscript>
+
+===== SRE Repetition Patterns
+
+There are also several ways to control the number of times a pattern
+is matched.  The simplest of these is {{?}} which just optionally
+matches the pattern:
+
+<enscript highlight=scheme>
+(irregex-search '(: "match" (? "es") "!") "matches!") => #<match>
+
+(irregex-search '(: "match" (? "es") "!") "match!") => #<match>
+
+(irregex-search '(: "match" (? "es") "!") "matche!") => #<match>
+</enscript>
+
+To optionally match any number of times, use {{*}}, the Kleene star:
+
+<enscript highlight=scheme>
+(irregex-search '(: "<" (* (~ #\>)) ">") "<html>") => #<match>
+
+(irregex-search '(: "<" (* (~ #\>)) ">") "<>") => #<match>
+
+(irregex-search '(: "<" (* (~ #\>)) ">") "<html") => #f
+</enscript>
+
+Often you want to match any number of times, but at least one time is
+required, and for that you use {{+}}:
+
+<enscript highlight=scheme>
+(irregex-search '(: "<" (+ (~ #\>)) ">") "<html>") => #<match>
+
+(irregex-search '(: "<" (+ (~ #\>)) ">") "<a>") => #<match>
+
+(irregex-search '(: "<" (+ (~ #\>)) ">") "<>") => #f
+</enscript>
+
+More generally, to match at least a given number of times, use {{>=}}:
+
+<enscript highlight=scheme>
+(irregex-search '(: "<" (>= 3 (~ #\>)) ">") "<table>") => #<match>
+
+(irregex-search '(: "<" (>= 3 (~ #\>)) ">") "<pre>") => #<match>
+
+(irregex-search '(: "<" (>= 3 (~ #\>)) ">") "<tr>") => #f
+</enscript>
+
+To match a specific number of times exactly, use {{=}}:
+
+<enscript highlight=scheme>
+(irregex-search '(: "<" (= 4 (~ #\>)) ">") "<html>") => #<match>
+
+(irregex-search '(: "<" (= 4 (~ #\>)) ">") "<table>") => #f
+</enscript>
+
+And finally, the most general form is {{**}} which specifies a range
+of times to match.  All of the earlier forms are special cases of this.
+
+<enscript highlight=scheme>
+(irregex-search '(: (= 3 (** 1 3 numeric) ".") (** 1 3 numeric)) "192.168.1.10") => #<match>
+
+(irregex-search '(: (= 3 (** 1 3 numeric) ".") (** 1 3 numeric)) "192.0168.1.10") => #f
+</enscript>
+
+There are also so-called "non-greedy" variants of these repetition
+operators, by convention suffixed with an additional {{?}}.  Since the
+normal repetition patterns can match any of the allotted repetition
+range, these operators will match a string if and only if the normal
+versions matched.  However, when the endpoints of which submatch
+matched where are taken into account (specifically, all matches when
+using irregex-search since the endpoints of the match itself matter),
+the use of a non-greedy repetition can change the result.
+
+So, whereas {{?}} can be thought to mean "match or don't match,"
+{{??}} means "don't match or match."  {{*}} typically consumes as much
+as possible, but {{*?}} tries first to match zero times, and only
+consumes one at a time if that fails.  If you have a greedy operator
+followed by a non-greedy operator in the same pattern, they can
+produce surprisins results as they compete to make the match longer or
+shorter.  If this seems confusing, that's because it is.  Non-greedy
+repetitions are defined only in terms of the specific backtracking
+algorithm used to implement them, which for compatibility purposes
+always means the Perl algorithm.  Thus, when using these patterns you
+force IrRegex to use a backtracking engine, and can't rely on
+efficient execution.
+
+===== SRE Character Sets
+
+Perhaps more common than matching specific strings is matching any of
+a set of characters.  You can use the {{or}} alternation pattern on a
+list of single-character strings to simulate a character set, but this
+is too clumsy for everyday use so SRE syntax allows a number of
+shortcuts.
+
+A single character matches that character literally, a trivial
+character class.  More conveniently, a list holding a single element
+which is a string refers to the character set composed of every
+character in the string.
+
+<enscript highlight=scheme>
+(irregex-match '(* #\-) "---") => #<match>
+
+(irregex-match '(* #\-) "-_-") => #f
+
+(irregex-match '(* ("aeiou")) "oui") => #<match>
+
+(irregex-match '(* ("aeiou")) "ouais") => #f
+</enscript>
+
+Ranges are introduced with the \q{/} operator.  Any strings or
+characters in the \q{/} are flattened and then taken in pairs to
+represent the start and end points, inclusive, of character ranges.
+
+<enscript highlight=scheme>
+(irregex-match '(* (/ "AZ09")) "R2D2") => #<match>
+
+(irregex-match '(* (/ "AZ09")) "C-3PO") => #f
+</enscript>
+
+In addition, a number of set algebra operations are provided.  \q{or},
+of course, has the same meaning, but when all the options are
+character sets it can be thought of as the set union operator.  This
+is further extended by the \q{&} set intersection, \q{-} set
+difference, and \q{~} set complement operators.
+
+<enscript highlight=scheme>
+(irregex-match '(* (& (/ "az") (~ ("aeiou")))) "xyzzy") => #<match>
+
+(irregex-match '(* (& (/ "az") (~ ("aeiou")))) "vowels") => #f
+
+(irregex-match '(* (- (/ "az") ("aeiou"))) "xyzzy") => #<match>
+
+(irregex-match '(* (- (/ "az") ("aeiou"))) "vowels") => #f
+</enscript>
+
+===== SRE Assertion Patterns
+
+There are a number of times it can be useful to assert something about
+the area around a pattern without explicitly making it part of the
+pattern.  The most common cases are specifically anchoring some
+pattern to the beginning or end of a word or line or even the whole
+string.  For example, to match on the end of a word:
+
+<enscript highlight=scheme>
+(irregex-match '(: "foo" eow) "foo") => #<match>
+
+(irregex-match '(: "foo" eow) "foo!") => #<match>
+
+(irregex-match '(: "foo" eow) "foof") => #f
+</enscript>
+
+The {{bow}}, {{bol}}, {{eol}}, {{bos}} and {{eos}} work similarly.
+{{nwb}} asserts that you are not in a word-boundary - if replaced for
+{{eow}} in the above examples it would reverse all the results.
+
+There is no {{wb}}, since you tend to know from context whether it
+would be the beginning or end of a word, but if you need it you can
+always use {{(or bow eow)}}.
+
+Somewhat more generally, Perl introduced positive and negative
+look-ahead and look-behind patterns.  Perl look-behind patterns are
+limited to a fixed length, however the IrRegex versions have no such
+limit.
+
+<enscript highlight=scheme>
+(irregex-match '(: "regular" (look-ahead " expression"))
+               "regular expression")
+ => #<match>
+</enscript>
+
+The most general case, of course, would be an \q{and} pattern to
+complement the \q{or} pattern - all the patterns must match or the
+whole pattern fails.  This may be provided in a future release,
+although it (and look-ahead and look-behind assertions) are unlikely
+to be compiled efficiently.
+
+===== SRE Utility Patterns
+
+The following utility regular expressions are also provided for common
+patterns that people are eternally reinventing.  They are not
+necessarily the official patterns matching the RFC definitions of the
+given data, because of the way that such patterns tend to be used.
+There are three general usages for regexps:
+
+; searching : search for a pattern matching a desired object in a larger text
+
+; validation : determine whether an entire string matches a pattern
+
+; extraction : given a string already known to be valid, extract certain fields from it as submatches
+
+In some cases, but not always, these will overlap.  When they are
+different, {{irregex-search}} will naturally always want the searching
+version, so IrRegex provides that version.
+
+As an example where these might be different, consider a URL.  If you
+want to match all the URLs in some arbitrary text, you probably want
+to exclude a period or comma at the tail end of a URL, since it's more
+likely being used as punctuation rather than part of the URL, despite
+the fact that it would be valid URL syntax.
+
+Another problem with the RFC definitions is the standard itself may
+have become irrelevant.  For example, the pattern IrRegex provides for
+email addresses doesn't match quoted local parts (e.g.
+{{"first last"@domain.com}}) because these are increasingly rare, and
+unsupported by enough software that it's better to discourage their use.
+Conversely, technically consecutive periods
+(e.g. {{first..last@domain.com}}) are not allowed in email addresses, but
+most email software does allow this, and in fact such addresses are
+quite common in Japan.
+
+The current patterns provided are:
+
+  newline                        ; general newline pattern (crlf, cr, lf)
+  integer                        ; an integer
+  real                           ; a real number (including scientific)
+  string                         ; a "quoted" string
+  symbol                         ; an R5RS Scheme symbol
+  ipv4-address                   ; a numeric decimal ipv4 address
+  ipv6-address                   ; a numeric hexadecimal ipv6 address
+  domain                         ; a domain name
+  email                          ; an email address
+  http-url                       ; a URL beginning with https?://
+
+Because of these issues the exact definitions of these patterns are
+subject to be changed, but will be documented clearly when they are
+finalized.  More common patterns are also planned, but as what you
+want increases in complexity it's probably better to use a real
+parser.
+
+==== Supported PCRE Syntax
+
+Since the PCRE syntax is so overwhelming complex, it's easier to just
+list what we *don't* support for now.  Refer to the
+[[http://pcre.org/pcre.txt|PCRE documentation]] for details.  You
+should be using the SRE syntax anyway!
+
+Unicode character classes ({{\P}}) are not supported, but will be
+in an upcoming release.  {{\C}} named characters are not supported.
+
+Callbacks, subroutine patterns and recursive patterns are not
+supported.  ({{*FOO}}) patterns are not supported and may never be.
+
+{{\G}} and {{\K}} are not supported.
+
+Octal character escapes are not supported because they are ambiguous
+with back-references - just use hex character escapes.
+
+Other than that everything should work, including named submatches,
+zero-width assertions, conditional patterns, etc.
+
+In addition, {{\<}} and {{\>}} act as beginning-of-word and end-of-word
+marks, respectively, as in Emacs regular expressions.
+
+Also, two escapes are provided to embed SRE patterns inside PCRE
+strings, {{"\'<sre>"}} and {{"(*'<sre>)"}}.  For example, to match a
+comma-delimited list of integers you could use
+
+<enscript highlight=scheme>
+"\\'integer(,\\'integer)*"
+</enscript>
+
+and to match a URL in angle brackets you could use
+
+<enscript highlight=scheme>
+"<('*http-url)>"
+</enscript>
+
+Note in the second example the enclosing {{"('*...)"}} syntax is needed
+because the Scheme reader would consider the closing {{">"}} as part of
+the SRE symbol.
+
+The following chart gives a quick reference from PCRE form to the SRE
+equivalent:
+
+  ;; basic syntax
+  "^"                     ;; bos (or eos inside (?m: ...))
+  "$"                     ;; eos (or eos inside (?m: ...))
+  "."                     ;; nonl
+  "a?"                    ;; (? a)
+  "a*"                    ;; (* a)
+  "a+"                    ;; (+ a)
+  "a??"                   ;; (?? a)
+  "a*?"                   ;; (*? a)
+  "a+?"                   ;; (+? a)
+  "a{n,m}"                ;; (** n m a)
+
+  ;; grouping
+  "(...)"                 ;; (submatch ...)
+  "(?:...)"               ;; (: ...)
+  "(?i:...)"              ;; (w/nocase ...)
+  "(?-i:...)"             ;; (w/case ...)
+  "(?<name>...)"          ;; (=> <name>...)
+
+  ;; character classes
+  "[aeiou]"               ;; ("aeiou")
+  "[^aeiou]"              ;; (~ "aeiou")
+  "[a-z]"                 ;; (/ "az") or (/ "a" "z")
+  "[[:alpha:]]"           ;; alpha
+
+  ;; assertions
+  "(?=...)"               ;; (look-ahead ...)
+  "(?!...)"               ;; (neg-look-ahead ...)
+  "(?<=...)"              ;; (look-behind ...)
+  "(?<!...)"              ;; (neg-look-behind ...)
+  "(?(test)pass|fail)"    ;; (if test pass fail)
+  "(*COMMIT)"             ;; commit
+
+==== Chunked String Matching
+
+It's often desirable to perform regular expression matching over
+sequences of characters not represented as a single string.  The most
+obvious example is a text-buffer data structure, but you may also want
+to match over lists or trees of strings (i.e. ropes), over only
+certain ranges within a string, over an input port, etc.  With
+existing regular expression libraries, the only way to accomplish this
+is by converting the abstract sequence into a freshly allocated
+string.  This can be expensive, or even impossible if the object is a
+text-buffer opened onto a 500MB file.
+
+IrRegex provides a chunked string API specifically for this purpose.
+You define a chunking API with {{make-irregex-chunker}}:
+
+===== make-irregex-chunker
+
+<procedure>(make-irregex-chunker <get-next> <get-string> [<get-start> <get-end> <get-substring> <get-subchunk>])</procedure>
+
+where 
+
+{{(<get-next> chunk) => }} returns the next chunk, or {{#f}} if there are no more chunks
+
+{{(<get-string> chunk) => }} a string source for the chunk
+
+{{(<get-start> chunk) => }} the start index of the result of {{<get-string>}} (defaults to always 0)
+
+{{(<get-end> chunk) => }} the end (exclusive) of the string (defaults to {{string-length}} of the source string)
+
+{{(<get-substring> cnk1 i cnk2 j) => }} a substring for the range between the chunk {{cnk1}} starting at index {{i}} and ending at {{cnk2}} at index {{j}}
+
+{{(<get-subchunk> cnk1 i cnk2 j) => }} as above but returns a new chunked data type instead of a string (optional)
+
+There are two important constraints on the {{<get-next>}} procedure.
+It must return an {{eq?}} identical object when called multiple times
+on the same chunk, and it must not return a chunk with an empty string
+(start == end).  This second constraint is for performance reasons -
+we push the work of possibly filtering empty chunks to the chunker
+since there are many chunk types for which empty strings aren't
+possible, and this work is thus not needed.  Note that the initial
+chunk passed to match on is allowed to be empty.
+
+{{<get-substring>}} is provided for possible performance improvements
+- without it a default is used.  {{<get-subchunk>}} is optional -
+without it you may not use {{irregex-match-subchunk}} described above.
+
+You can then match chunks of these types with the following
+procedures:
+
+===== irregex-search/chunked
+===== irregex-match/chunked
+
+<procedure>(irregex-search/chunked <irx> <chunker> <chunk> [<start>])</procedure><br>
+<procedure>(irregex-match/chunked <irx> <chunker> <chunk> [<start>])</procedure>
+
+These return normal match-data objects.
+
+Example:
+
+To match against a simple, flat list of strings use:
+
+<enscript highlight=scheme>
+  (define (rope->string rope1 start rope2 end)
+    (if (eq? rope1 rope2)
+        (substring (car rope1) start end)
+        (let loop ((rope (cdr rope1))
+                   (res (list (substring (car rope1) start))))
+           (if (eq? rope rope2)
+               (string-concatenate-reverse      ; from SRFI-13
+                (cons (substring (car rope) 0 end) res))
+               (loop (cdr rope) (cons (car rope) res))))))
+
+  (define rope-chunker
+    (make-irregex-chunker (lambda (x) (and (pair? (cdr x)) (cdr x)))
+                          car
+                          (lambda (x) 0)
+                          (lambda (x) (string-length (car x)))
+                          rope->string))
+
+  (irregex-search/chunked <pat> rope-chunker <list-of-strings>)
+</enscript>
+
+Here we are just using the default start, end and substring behaviors,
+so the above chunker could simply be defined as:
+
+<enscript highlight=scheme>
+  (define rope-chunker
+    (make-irregex-chunker (lambda (x) (and (pair? (cdr x)) (cdr x))) car))
+</enscript>
+
+===== irregex-fold/chunked
+
+<procedure>(irregex-fold/chunked <irx> <kons> <knil> <chunker> <chunk> [<finish> [<start-index>]])</procedure>
+
+Chunked version of {{irregex-fold}}.
+
+==== Utilities
+
+The following procedures are also available.
+
+===== irregex-quote
+
+<procedure>(irregex-quote <str>)</procedure>
+
+Returns a new string with any special regular expression characters
+escaped, to match the original string literally in POSIX regular
+expressions.
+
+===== irregex-opt
+
+<procedure>(irregex-opt <list-of-strings>)</procedure>
+
+Returns an optimized SRE matching any of the literal strings
+in the list, like Emacs' \q{regexp-opt}.  Note this optimization
+doesn't help when irregex is able to build a DFA.
+
+===== sre->string
+
+<procedure>(sre->string <sre>)</procedure>
+
+Convert an SRE to a POSIX-style regular expression string, if
+possible.
+
+
+---
+Previous: [[Unit extras]]
+
+Next: [[Unit srfi-1]]
diff --git a/manual/Unit posix b/manual/Unit posix
index f236e3d6..5627f61b 100644
--- a/manual/Unit posix	
+++ b/manual/Unit posix	
@@ -1104,7 +1104,7 @@ Recursively traverses the contents of {{DIRECTORY}} (which should be a
 string) and invokes the procedure {{action}} for all files in which
 the procedure {{test}} is true.  {{test}} may be a procedure of one
 argument or a regular-expression string that will be matched with a
-full pathname using {{string-match}}.  {{action}} should be a
+full pathname using {{irregex-match}}.  {{action}} should be a
 procedure of two arguments: the currently encountered file and the
 result of the previous invocation of {{action}}, or, if this is the
 first invocation, the value of {{seed}}. {{test}} defaults to
diff --git a/manual/Unit regex b/manual/Unit regex
deleted file mode 100644
index 2d0c249e..00000000
--- a/manual/Unit regex	
+++ /dev/null
@@ -1,461 +0,0 @@
-[[tags: manual]]
-[[toc:]]
-
-== Unit regex
-
-This library unit provides support for regular expressions. The regular 
-expression package used is {{irregex}}
-written by Alex Shinn. Irregex supports most Perl-extensions and is
-written completely in Scheme.
-
-This library unit exposes two APIs: the standard Chicken API described below, and the
-original irregex API.  You may use either API or both:
-
- (require-library regex)   ; required for either API, or both
- (import regex)            ; import the Chicken regex API
- (import irregex)          ; import the original irregex API
-
-Regular expressions may be either POSIX-style strings (with most PCRE
-extensions) or an SCSH-style SRE. There is no {{(rx ...)}} syntax -
-just use normal Scheme lists, with quasiquote if you like.
-
-=== grep
-
-<procedure>(grep REGEX LIST [ACCESSOR])</procedure>
-
-Returns all items of {{LIST}} that match the regular expression
-{{REGEX}}.  This procedure could be defined as follows:
-
-<enscript highlight=scheme>
-(define (grep regex lst)
-  (filter (lambda (x) (string-search regex x)) lst) )
-</enscript>
-
-{{ACCESSOR}} is an optional accessor-procedure applied to each
-element before doing the match. It should take a single argument
-and return a string that will then be used in the regular expression
-matching. {{ACCESSOR}} defaults to the identity function.
-
-
-=== glob->regexp
-
-<procedure>(glob->regexp PATTERN [SRE?])</procedure>
-
-Converts the file-pattern {{PATTERN}} into a regular expression.
-
-<enscript highlight=scheme>
-(glob->regexp "foo.*")
-=> "foo\..*"
-</enscript>
-
-{{PATTERN}} should follow "glob" syntax. Allowed wildcards are
-
- *
- [C...]
- [C1-C2]
- [-C...]
- ?
-
-{{glob->regexp}} returns a regular expression object if the optional
-argument {{SRE?}} is false or not given, otherwise the SRE of the
-computed regular expression is returned.
-
-
-=== regexp
-
-<procedure>(regexp STRING [IGNORECASE [IGNORESPACE [UTF8]]])</procedure>
-
-Returns a precompiled regular expression object for {{string}}.
-The optional arguments {{IGNORECASE}}, {{IGNORESPACE}} and {{UTF8}}
-specify whether the regular expression should be matched with case- or whitespace-differences
-ignored, or whether the string should be treated as containing UTF-8 encoded
-characters, respectively.
-
-Note that code that uses regular expressions heavily should always
-use them in precompiled form, which is likely to be much faster than
-passing strings to any of the regular-expression routines described
-below.
-
-
-=== regexp?
-
-<procedure>(regexp? X)</procedure>
-
-Returns {{#t}} if {{X}} is a precompiled regular expression,
-or {{#f}} otherwise.
-
-
-=== string-match
-=== string-match-positions
-
-<procedure>(string-match REGEXP STRING)</procedure><br>
-<procedure>(string-match-positions REGEXP STRING)</procedure>
-
-Matches the regular expression in {{REGEXP}} (a string or a precompiled
-regular expression) with
-{{STRING}} and returns either {{#f}} if the match failed,
-or a list of matching groups, where the first element is the complete
-match.  For each matching group the
-result-list contains either: {{#f}} for a non-matching but optional
-group; a list of start- and end-position of the match in {{STRING}}
-(in the case of {{string-match-positions}}); or the matching
-substring (in the case of {{string-match}}). Note that the exact string
-is matched. For searching a pattern inside a string, see below.
-Note also that {{string-match}} is implemented by calling
-{{string-search}} with the regular expression wrapped in {{^ ... $}}.
-If invoked with a precompiled regular expression argument (by using
-{{regexp}}), {{string-match}} is identical to {{string-search}}.
-
-
-=== string-search
-=== string-search-positions
-
-<procedure>(string-search REGEXP STRING [START [RANGE]])</procedure><br>
-<procedure>(string-search-positions REGEXP STRING [START [RANGE]])</procedure>
-
-Searches for the first match of the regular expression in
-{{REGEXP}} with {{STRING}}. The search can be limited to
-{{RANGE}} characters.
-
-
-=== string-split-fields
-
-<procedure>(string-split-fields REGEXP STRING [MODE [START]])</procedure>
-
-Splits {{STRING}} into a list of fields according to {{MODE}},
-where {{MODE}} can be the keyword {{#:infix}} ({{REGEXP}}
-matches field separator), the keyword {{#:suffix}} ({{REGEXP}}
-matches field terminator) or {{#t}} ({{REGEXP}} matches field),
-which is the default.
-
-<enscript highlight=scheme>
-(define s "this is a string 1, 2, 3,")
-
-(string-split-fields "[^ ]+" s)
-
-  => ("this" "is" "a" "string" "1," "2," "3,")
-
-(string-split-fields " " s #:infix)
-
-  => ("this" "is" "a" "string" "1," "2," "3,")
-
-(string-split-fields "," s #:suffix)
- 
-  => ("this is a string 1" " 2" " 3")
-</enscript>
-
-
-=== string-substitute
-
-<procedure>(string-substitute REGEXP SUBST STRING [MODE])</procedure>
-
-Searches substrings in {{STRING}} that match {{REGEXP}}
-and substitutes them with the string {{SUBST}}. The substitution
-can contain references to subexpressions in 
-{{REGEXP}} with the {{\NUM}} notation, where {{NUM}}
-refers to the NUMth parenthesized expression. The optional argument
-{{MODE}} defaults to 1 and specifies the number of the match to
-be substituted. Any non-numeric index specifies that all matches are to
-be substituted.
-
-<enscript highlight=scheme>
-(string-substitute "([0-9]+) (eggs|chicks)" "\\2 (\\1)" "99 eggs or 99 chicks" 2)
-=> "99 eggs or chicks (99)"
-</enscript>
-
-Note that a regular expression that matches an empty string will
-signal an error.
-
-
-=== string-substitute*
-
-<procedure>(string-substitute* STRING SMAP [MODE])</procedure>
-
-Substitutes elements of {{STRING}} with {{string-substitute}} according to {{SMAP}}.
-{{SMAP}} should be an association-list where each element of the list
-is a pair of the form {{(MATCH . REPLACEMENT)}}. Every occurrence of
-the regular expression {{MATCH}} in {{STRING}} will be replaced by the string
-{{REPLACEMENT}}
-
-<enscript highlight=scheme>
-(string-substitute* "<h1>Hello, world!</h1>" '(("<[/A-Za-z0-9]+>" . "")))
-
-=>  "Hello, world!"
-</enscript>
-
-
-=== regexp-escape
-
-<procedure>(regexp-escape STRING)</procedure>
-
-Escapes all special characters in {{STRING}} with {{\}}, so that the string can be embedded
-into a regular expression.
-
-<enscript highlight=scheme>
-(regexp-escape "^[0-9]+:.*$")
-=>  "\\^\\[0-9\\]\\+:.\n.\\*\\$"
-</enscript>
-
-=== Extended SRE Syntax
-
-The following table summarizes the SRE syntax, with detailed explanations following.
-
-  ;; basic patterns
-  <string>                          ; literal string
-  (seq <sre> ...)                   ; sequence
-  (: <sre> ...)
-  (or <sre> ...)                    ; alternation
-  
-  ;; optional/multiple patterns
-  (? <sre> ...)                     ; 0 or 1 matches
-  (* <sre> ...)                     ; 0 or more matches
-  (+ <sre> ...)                     ; 1 or more matches
-  (= <n> <sre> ...)                 ; exactly <n> matches
-  (>= <n> <sre> ...)                ; <n> or more matches
-  (** <from> <to> <sre> ...)        ; <n> to <m> matches
-  (?? <sre> ...)                    ; non-greedy (non-greedy) pattern: (0 or 1)
-  (*? <sre> ...)                    ; non-greedy kleene star
-  (**? <from> <to> <sre> ...)       ; non-greedy range
-  
-  ;; submatch patterns
-  (submatch <sre> ...)              ; numbered submatch
-  (submatch-named <name> <sre> ...) ; named submatch
-  (=> <name> <sre> ...)
-  (backref <n-or-name>)             ; match a previous submatch
-  
-  ;; toggling case-sensitivity
-  (w/case <sre> ...)                ; enclosed <sre>s are case-sensitive
-  (w/nocase <sre> ...)              ; enclosed <sre>s are case-insensitive
-  
-  ;; character sets
-  <char>                            ; singleton char set
-  (<string>)                        ; set of chars
-  (or <cset-sre> ...)               ; set union
-  (~ <cset-sre> ...)                ; set complement (i.e. [^...])
-  (- <cset-sre> ...)                ; set difference
-  (& <cset-sre> ...)                ; set intersection
-  (/ <range-spec> ...)              ; pairs of chars as ranges
-  
-  ;; named character sets
-  any
-  nonl
-  ascii
-  lower-case     lower
-  upper-case     upper
-  alphabetic     alpha
-  numeric        num
-  alphanumeric   alphanum  alnum
-  punctuation    punct
-  graphic        graph
-  whitespace     white     space
-  printing       print
-  control        cntrl
-  hex-digit      xdigit
-  
-  ;; assertions and conditionals
-  bos eos                           ; beginning/end of string
-  bol eol                           ; beginning/end of line
-  bow eow                           ; beginning/end of word
-  nwb                               ; non-word-boundary
-  (look-ahead <sre> ...)            ; zero-width look-ahead assertion
-  (look-behind <sre> ...)           ; zero-width look-behind assertion
-  (neg-look-ahead <sre> ...)        ; zero-width negative look-ahead assertion
-  (neg-look-behind <sre> ...)       ; zero-width negative look-behind assertion
-  (atomic <sre> ...)                ; for (?>...) independent patterns
-  (if <test> <pass> [<fail>])       ; conditional patterns
-  commit                            ; don't backtrack beyond this (i.e. cut)
-  
-  ;; backwards compatibility
-  (posix-string <string>)           ; embed a POSIX string literal
-
-====  Basic SRE Patterns
-
-The simplest SRE is a literal string, which matches that string exactly.
-
-  (string-search "needle" "hayneedlehay") => <match>
-
-By default the match is case-sensitive, though you can control this either with the compiler flags or local overrides:
-
-  (string-search "needle" "haynEEdlehay") => #f
-  
-  (string-search (irregex "needle" 'i) "haynEEdlehay") => <match>
-  
-  (string-search '(w/nocase "needle") "haynEEdlehay") => <match>
-
-You can use {{w/case}} to switch back to case-sensitivity inside a {{w/nocase}}:
-
-  (string-search '(w/nocase "SMALL" (w/case "BIG")) "smallBIGsmall") => <match>
-  
-  (string-search '(w/nocase "small" (w/case "big")) "smallBIGsmall") => #f
-
-Of course, literal strings by themselves aren't very interesting
-regular expressions, so we want to be able to compose them. The most
-basic way to do this is with the {{seq}} operator (or its abbreviation {{:}}),
-which matches one or more patterns consecutively:
-
-  (string-search '(: "one" space "two" space "three") "one two three") => <match>
-
-As you may have noticed above, the {{w/case}} and {{w/nocase}} operators
-allowed multiple SREs in a sequence - other operators that take any
-number of arguments (e.g. the repetition operators below) allow such
-implicit sequences.
-
-To match any one of a set of patterns use the or alternation operator:
-
-  (string-search '(or "eeney" "meeney" "miney") "meeney") => <match>
-
-  (string-search '(or "eeney" "meeney" "miney") "moe") => #f
-
-====  SRE Repetition Patterns
-
-There are also several ways to control the number of times a pattern
-is matched. The simplest of these is {{?}} which just optionally matches
-the pattern:
-
-  (string-search '(: "match" (? "es") "!") "matches!") => <match>
-  
-  (string-search '(: "match" (? "es") "!") "match!") => <match>
-  
-  (string-search '(: "match" (? "es") "!") "matche!") => #f
-
-To optionally match any number of times, use {{*}}, the Kleene star:
-
-  (string-search '(: "<" (* (~ #\>)) ">") "<html>") => <match>
-  
-  (string-search '(: "<" (* (~ #\>)) ">") "<>") => <match>
-  
-  (string-search '(: "<" (* (~ #\>)) ">") "<html") => #f
-
-Often you want to match any number of times, but at least one time is required, and for that you use {{+}}:
-
-  (string-search '(: "<" (+ (~ #\>)) ">") "<html>") => <match>
-  
-  (string-search '(: "<" (+ (~ #\>)) ">") "<a>") => <match>
-  
-  (string-search '(: "<" (+ (~ #\>)) ">") "<>") => #f
-
-More generally, to match at least a given number of times, use {{>=}}:
-
-  (string-search '(: "<" (>= 3 (~ #\>)) ">") "<table>") => <match>
-
-  (string-search '(: "<" (>= 3 (~ #\>)) ">") "<pre>") => <match>
-
-  (string-search '(: "<" (>= 3 (~ #\>)) ">") "<tr>") => #f
-
-To match a specific number of times exactly, use {=}:
-
-  (string-search '(: "<" (= 4 (~ #\>)) ">") "<html>") => <match>
-  
-  (string-search '(: "<" (= 4 (~ #\>)) ">") "<table>") => #f
-
-And finally, the most general form is {{**}} which specifies a range
-of times to match. All of the earlier forms are special cases of this.
-
-  (string-search '(: (= 3 (** 1 3 numeric) ".") (** 1 3 numeric)) "192.168.1.10") => <match>
-
-  (string-search '(: (= 3 (** 1 3 numeric) ".") (** 1 3 numeric)) "192.0168.1.10") => #f
-
-There are also so-called "non-greedy" variants of these repetition
-operators, by convention suffixed with an additional {{?}}. Since the
-normal repetition patterns can match any of the allotted repetition
-range, these operators will match a string if and only if the normal
-versions matched. However, when the endpoints of which submatch
-matched where are taken into account (specifically, all matches when
-using string-search since the endpoints of the match itself matter),
-the use of a non-greedy repetition can change the result.
-
-So, whereas {{?}} can be thought to mean "match or don't match," {{??}} means
-"don't match or match." {{*}} typically consumes as much as possible, but
-{{*?}} tries first to match zero times, and only consumes one at a time if
-that fails. If you have a greedy operator followed by a non-greedy
-operator in the same pattern, they can produce surprisins results as
-they compete to make the match longer or shorter. If this seems
-confusing, that's because it is. Non-greedy repetitions are defined
-only in terms of the specific backtracking algorithm used to implement
-them, which for compatibility purposes always means the Perl
-algorithm. Thus, when using these patterns you force IrRegex to use a
-backtracking engine, and can't rely on efficient execution.
-
-====  SRE Character Sets
-
-Perhaps more common than matching specific strings is matching any of
-a set of characters. You can use the or alternation pattern on a list
-of single-character strings to simulate a character set, but this is
-too clumsy for everyday use so SRE syntax allows a number of
-shortcuts.
-
-A single character matches that character literally, a trivial
-character class. More conveniently, a list holding a single element
-which is a string refers to the character set composed of every
-character in the string.
-
-  (string-match '(* #\-) "---") => <match>
-  
-  (string-match '(* #\-) "-_-") => #f
-  
-  (string-match '(* ("aeiou")) "oui") => <match>
-  
-  (string-match '(* ("aeiou")) "ouais") => #f
-
-Ranges are introduced with the {{/}} operator. Any strings or characters
-in the {{/}} are flattened and then taken in pairs to represent the start
-and end points, inclusive, of character ranges.
-
-  (string-match '(* (/ "AZ09")) "R2D2") => <match>
-  
-  (string-match '(* (/ "AZ09")) "C-3PO") => #f
-
-In addition, a number of set algebra operations are provided. or, of
-course, has the same meaning, but when all the options are character
-sets it can be thought of as the set union operator. This is further
-extended by the {{&}} set intersection, {{-}} set difference, and {{~}} set
-complement operators.
-
-  (string-match '(* (& (/ "az") (~ ("aeiou")))) "xyzzy") => <match>
-  
-  (string-match '(* (& (/ "az") (~ ("aeiou")))) "vowels") => #f
-
-  (string-match '(* (- (/ "az") ("aeiou"))) "xyzzy") => <match>
-  
-  (string-match '(* (- (/ "az") ("aeiou"))) "vowels") => #f
-
-====  SRE Assertion Patterns
-
-There are a number of times it can be useful to assert something about
-the area around a pattern without explicitly making it part of the
-pattern. The most common cases are specifically anchoring some pattern
-to the beginning or end of a word or line or even the whole
-string. For example, to match on the end of a word:
-
-  (string-match '(: "foo" eow) "foo") => <match>
-  
-  (string-match '(: "foo" eow) "foo!") => <match>
-  
-  (string-match '(: "foo" eow) "foof") => #f
-
-The {{bow}}, {{bol}}, {{eol}}, {{bos}} and {{eos}} work similarly. {{nwb}} asserts that you
-are not in a word-boundary - if replaced for {{eow}} in the above examples
-it would reverse all the results.
-
-There is no {{wb}}, since you tend to know from context whether it
-would be the beginning or end of a word, but if you need it you can
-always use (or bow eow).
-
-Somewhat more generally, Perl introduced positive and negative
-look-ahead and look-behind patterns. Perl look-behind patterns are
-limited to a fixed length, however the IrRegex versions have no such
-limit.
-
-  (string-match '(: "regular" (look-ahead " expression")) "regular expression") => <match>
-
-The most general case, of course, would be an and pattern to
-complement the or pattern - all the patterns must match or the whole
-pattern fails. This may be provided in a future release, although it
-(and look-ahead and look-behind assertions) are unlikely to be
-compiled efficiently.
-
-
----
-Previous: [[Unit extras]]
-
-Next: [[Unit srfi-1]]
diff --git a/manual/Unit srfi-1 b/manual/Unit srfi-1
index 72ac271f..59a47160 100644
--- a/manual/Unit srfi-1	
+++ b/manual/Unit srfi-1	
@@ -1,4 +1,5 @@
 [[tags: manual]]
+[[toc:]]
 
 == Unit srfi-1
 
@@ -1515,6 +1516,6 @@ arguments.
 
 ----
 
-Previous: [[Unit regex]]
+Previous: [[Unit irregex]]
 
 Next: [[Unit srfi-4]]
diff --git a/manual/Unit utils b/manual/Unit utils
index e1af0895..8c174b1d 100644
--- a/manual/Unit utils	
+++ b/manual/Unit utils	
@@ -88,11 +88,12 @@ The initial default options are {{-scrutinize -O2 -d2}}.
 <procedure>(scan-input-lines REGEXP [PORT])</procedure>
 
 Reads lines from {{PORT}} (defaults to the result of {{(current-input-port)}})
-using {{read-line}} and returns the result of {{(string-search REGEXP LINE)}},
+using {{read-line}} and returns the result of {{(irregex-search REGEXP LINE)}},
 if the match succeeds. If no match could be found, {{#f}} is returned.
 
 {{REGEXP}} may also be a procedure of one argument which is called for each
-input line and should return a non-false value on success.
+input line and should return a non-false value on success, which will then
+be the result of the call to {{scan-input-lines}}.
 
 
 === Asking the user for confirmation
diff --git a/manual/faq b/manual/faq
index 7abf0381..24f054af 100644
--- a/manual/faq
+++ b/manual/faq
@@ -533,7 +533,7 @@ Compile the program that uses the module:
 The regular expression engine has recently be replaced by [[/users/alex shinn|alex shinn]]'s excellent
 {{irregex}} library, which is fully implemented in Scheme. Precompiling regular
 expressions to internal form is somewhat slower than with the old PCRE-based
-regex engine. It is advisable to use {{regexp}} to precompile regular expressions
+regex engine. It is advisable to use {{irregex}} to precompile regular expressions
 outside of time-critical loops and use them where performance matters.
 
 
diff --git a/posix-common.scm b/posix-common.scm
index 55f9f488..e77b05fb 100644
--- a/posix-common.scm
+++ b/posix-common.scm
@@ -233,64 +233,54 @@ EOF
 ;;; Filename globbing:
 
 (define glob
-  (let ((regexp regexp)
-        (string-match string-match)
-        (glob->regexp glob->regexp)
-	(directory directory)
-        (make-pathname make-pathname)
-        (decompose-pathname decompose-pathname) )
-    (lambda paths
-      (let conc-loop ((paths paths))
-        (if (null? paths)
-            '()
-            (let ((path (car paths)))
-              (let-values (((dir fil ext) (decompose-pathname path)))
-                (let* ((patt (glob->regexp (make-pathname #f (or fil "*") ext)))
-                       (rx (regexp patt)))
-                  (let loop ((fns (directory (or dir ".") #t)))
-                    (cond ((null? fns) (conc-loop (cdr paths)))
-                          ((string-match rx (car fns))
-                           => (lambda (m) (cons (make-pathname dir (car m)) (loop (cdr fns)))) )
-                          (else (loop (cdr fns))) ) ) ) ) ) ) ) ) ) )
+  (lambda paths
+    (let conc-loop ((paths paths))
+      (if (null? paths)
+	  '()
+	  (let ((path (car paths)))
+	    (let-values (((dir fil ext) (decompose-pathname path)))
+	      (let ((rx (##sys#glob->regexp (make-pathname #f (or fil "*") ext))))
+		(let loop ((fns (directory (or dir ".") #t)))
+		  (cond ((null? fns) (conc-loop (cdr paths)))
+			((irregex-match rx (car fns))
+			 => (lambda (m)
+			      (cons 
+			       (make-pathname dir (irregex-match-substring m))
+			       (loop (cdr fns)))) )
+			(else (loop (cdr fns))) ) ) ) ) ) ) ) ) )
 
 
 ;;; Find matching files:
 
 (define ##sys#find-files
-  (let ((glob glob)
-	(string-match string-match)
-	(make-pathname make-pathname)
-	(pathname-file pathname-file)
-	(symbolic-link? symbolic-link?)
-	(directory? directory?) )
-    (lambda (dir pred action id limit follow dot loc)
-	(##sys#check-string dir loc)
-	(let* ((depth 0)
-	       (lproc
-		(cond ((not limit) (lambda _ #t))
-		      ((fixnum? limit) (lambda _ (fx< depth limit)))
-		      (else limit) ) )
-	       (pproc
-		(if (or (string? pred) (regexp? pred))
-		    (let ((pred (regexp pred))) ; force compilation
-		      (lambda (x) (string-match pred x)))
-		    pred) ) )
-	  (let loop ((fs (glob (make-pathname dir (if dot "?*" "*"))))
-		     (r id) )
-	    (if (null? fs)
-		r
-		(let ((f (##sys#slot fs 0))
-		      (rest (##sys#slot fs 1)) )
-		  (cond ((directory? f)
-			 (cond ((member (pathname-file f) '("." "..")) (loop rest r))
-			       ((lproc f)
-				(loop rest
-				      (fluid-let ((depth (fx+ depth 1)))
-					(loop (glob (make-pathname f "*"))
-					      (if (pproc f) (action f r) r)) ) ) )
-			       (else (loop rest (if (pproc f) (action f r) r))) ) )
-			((pproc f) (loop rest (action f r)))
-			(else (loop rest r)) ) ) ) ) ) ) ) )
+  (lambda (dir pred action id limit follow dot loc)
+    (##sys#check-string dir loc)
+    (let* ((depth 0)
+	   (lproc
+	    (cond ((not limit) (lambda _ #t))
+		  ((fixnum? limit) (lambda _ (fx< depth limit)))
+		  (else limit) ) )
+	   (pproc
+	    (if (or (string? pred) (irregex? pred))
+		(let ((pred (irregex pred))) ; force compilation
+		  (lambda (x) (irregex-match pred x)))
+		pred) ) )
+      (let loop ((fs (glob (make-pathname dir (if dot "?*" "*"))))
+		 (r id) )
+	(if (null? fs)
+	    r
+	    (let ((f (##sys#slot fs 0))
+		  (rest (##sys#slot fs 1)) )
+	      (cond ((directory? f)
+		     (cond ((member (pathname-file f) '("." "..")) (loop rest r))
+			   ((lproc f)
+			    (loop rest
+				  (fluid-let ((depth (fx+ depth 1)))
+				    (loop (glob (make-pathname f "*"))
+					  (if (pproc f) (action f r) r)) ) ) )
+			   (else (loop rest (if (pproc f) (action f r) r))) ) )
+		    ((pproc f) (loop rest (action f r)))
+		    (else (loop rest r)) ) ) ) ) ) ) )
 
 (define (find-files dir . args)
   (cond ((or (null? args) (not (keyword? (car args))))
diff --git a/posixunix.scm b/posixunix.scm
index e17f6b3e..ed83fe1b 100644
--- a/posixunix.scm
+++ b/posixunix.scm
@@ -27,7 +27,7 @@
 
 (declare
   (unit posix)
-  (uses scheduler regex extras utils files ports)
+  (uses scheduler irregex extras utils files ports)
   (disable-interrupts)
   (hide group-member _get-groups _ensure-groups posix-error ##sys#terminal-check)
   (not inline ##sys#interrupt-hook ##sys#user-interrupt-hook))
diff --git a/posixwin.scm b/posixwin.scm
index 9dee8ede..97a0a232 100644
--- a/posixwin.scm
+++ b/posixwin.scm
@@ -63,7 +63,7 @@
 
 (declare
   (unit posix)
-  (uses scheduler regex extras utils files ports)
+  (uses scheduler irregex extras utils files ports)
   (disable-interrupts)
   (hide $quote-args-list $exec-setup $exec-teardown)
   (not inline ##sys#interrupt-hook ##sys#user-interrupt-hook)
diff --git a/regex.import.scm b/regex.import.scm
deleted file mode 100644
index 70310a22..00000000
--- a/regex.import.scm
+++ /dev/null
@@ -1,41 +0,0 @@
-;;;; regex.import.scm - import library for "regex" module
-;
-; Copyright (c) 2008-2010, The Chicken Team
-; All rights reserved.
-;
-; Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following
-; conditions are met:
-;
-;   Redistributions of source code must retain the above copyright notice, this list of conditions and the following
-;     disclaimer. 
-;   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
-;     disclaimer in the documentation and/or other materials provided with the distribution. 
-;   Neither the name of the author nor the names of its contributors may be used to endorse or promote
-;     products derived from this software without specific prior written permission. 
-;
-; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
-; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
-; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
-; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-; POSSIBILITY OF SUCH DAMAGE.
-
-
-(##sys#register-primitive-module
- 'regex
- '(glob->regexp
-   glob?
-   grep
-   regexp
-   regexp-escape
-   regexp?
-   string-match
-   string-match-positions
-   string-search
-   string-search-positions
-   string-split-fields
-   string-substitute
-   string-substitute*))
diff --git a/regex.scm b/regex.scm
deleted file mode 100644
index 526e65ad..00000000
--- a/regex.scm
+++ /dev/null
@@ -1,360 +0,0 @@
-;;;; regex.scm
-;
-; Copyright (c) 2008-2010, The Chicken Team
-; Copyright (c) 2000-2007, Felix L. Winkelmann
-; All rights reserved.
-;
-; Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following
-; conditions are met:
-;
-;   Redistributions of source code must retain the above copyright notice, this list of conditions and the following
-;     disclaimer.
-;   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
-;     disclaimer in the documentation and/or other materials provided with the distribution.
-;   Neither the name of the author nor the names of its contributors may be used to endorse or promote
-;     products derived from this software without specific prior written permission.
-;
-; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
-; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
-; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
-; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-; POSSIBILITY OF SUCH DAMAGE.
-
-
-(declare (unit regex))
-
-(declare
-  (disable-interrupts)
-  (fixnum)
-  (export
-    regexp? regexp
-    string-match string-match-positions string-search string-search-positions
-    string-split-fields string-substitute string-substitute*
-    glob->regexp
-    grep
-    regexp-escape 
-
-    irregex string->irregex sre->irregex string->sre
-    irregex? irregex-match-data?
-    irregex-new-matches irregex-reset-matches!
-    irregex-match-start irregex-match-end irregex-match-substring
-    irregex-match-num-submatches
-    irregex-search irregex-search/matches irregex-match irregex-match-string
-    irregex-fold irregex-replace irregex-replace/all irregex-apply-match
-    irregex-dfa irregex-dfa/search irregex-dfa/extract
-    irregex-nfa irregex-flags irregex-submatches irregex-lengths irregex-names
-    ))
-
-(include "common-declarations.scm")
-
-(register-feature! 'regex 'irregex)
-
-(include "irregex.scm")
-
-
-;;; Record `regexp'
-
-(define-record regexp x)
-
-(define-syntax (build-cache x r c)
-  ;; (build-cache N ARG FAIL) 
-  (let* ((n (cadr x))
-	 (n2 (* n 2))
-	 (arg (caddr x))
-	 (fail (cadddr x))
-	 (%cache (r 'cache))
-	 (%index (r 'index))
-	 (%arg (r 'arg))
-	 (%let (r 'let))
-	 (%let* (r 'let*))
-	 (%if (r 'if))
-	 (%fx+ (r 'fx+))
-	 (%fxmod (r 'fxmod))
-	 (%equal? (r 'equal?))
-	 (%quote (r 'quote))
-	 (%tmp (r 'tmp))
-	 (%begin (r 'begin))
-	 (cache (make-vector (add1 n2) #f)))
-    (vector-set! cache n2 0)		; last slot: current index
-    `(,%let* ((,%cache (,%quote ,cache))
-	      (,%arg ,arg))
-	     ,(let fold ((i 0))
-		(if (>= i n)
-		    ;; this should be thread-safe: a context-switch can only
-		    ;; happen before this code and in the call to FAIL.
-		    `(,%let ((,%tmp ,fail)
-			     (,%index (##sys#slot ,%cache ,n2)))
-			    (##sys#setslot ,%cache ,%index ,%arg)
-			    (##sys#setslot ,%cache (,%fx+ ,%index 1) ,%tmp)
-			    (##sys#setislot 
-			     ,%cache ,n2
-			     (##core#inline "C_u_fixnum_modulo" (,%fx+ ,%index 2) ,n2))
-			    ,%tmp)
-		    `(,%if (,%equal? (##sys#slot ,%cache ,(* i 2)) ,%arg)
-			   (##sys#slot ,%cache ,(add1 (* i 2)))
-			   ,(fold (add1 i))))))))
-
-(define (regexp pat #!optional caseless extended utf8)
-  (if (regexp? pat)
-      pat
-      (make-regexp
-       (apply
-	irregex 
-	pat 
-	(let ((opts '()))
-	  (when caseless (set! opts (cons 'i opts)))
-	  (when extended (set! opts (cons 'x opts)))
-	  (when utf8 (set! opts (cons 'utf8 opts)))
-	  opts))) ) )
-
-(define (unregexp x)
-  (cond ((regexp? x) (regexp-x x))
-	((irregex? x) x)
-	(else
-	 (build-cache
-	  5 x
-	  (irregex x)))))
-
-
-;;; Basic `regexp' operations
-
-(define (string-match rx str)
-  (let ((rx (unregexp rx)))
-    (and-let* ((m (irregex-match rx str)))
-      (let loop ((i (irregex-match-num-submatches m))
-                 (res '()))
-        (if (fx<= i 0)
-            (cons str res)
-            (loop (fx- i 1) (cons (irregex-match-substring m i) res)))))))
-
-(define (string-match-positions rx str)
-  (let ((rx (unregexp rx)))
-    (and-let* ((m (irregex-match rx str)))
-      (let loop ((i (irregex-match-num-submatches m))
-                 (res '()))
-        (if (fx<= i 0)
-            (cons (list 0 (string-length str)) res)
-            (loop (fx- i 1) (cons (list (irregex-match-start-index m i)
-                                        (irregex-match-end-index m i))
-                                  res)))))))
-
-(define (string-search rx str #!optional (start 0) (range (string-length str)))
-  (let ((rx (unregexp rx)))
-    (let ((n (string-length str)))
-      (and-let* ((m (irregex-search rx str start (min n (fx+ start range)))))
-	(let loop ((i (irregex-match-num-submatches m))
-		   (res '()))
-	  (if (fx< i 0)
-	      res
-	      (loop (fx- i 1) (cons (irregex-match-substring m i) res))))))))
-
-(define (string-search-positions rx str #!optional (start 0) (range (string-length str)))
-  (let ((rx (unregexp rx)))
-    (let ((n (string-length str)))
-      (and-let* ((m (irregex-search rx str start (min n (fx+ start range)))))
-	(let loop ((i (irregex-match-num-submatches m))
-		   (res '()))
-	  (if (fx< i 0)
-	      res
-	      (loop (fx- i 1) (cons (list (irregex-match-start-index m i)
-					  (irregex-match-end-index m i))
-				    res))))))))
-
-
-;;; Split string into fields:
-
-(define string-split-fields
-  (let ([reverse reverse]
-        [substring substring]
-        [string-search-positions string-search-positions] )
-    (lambda (rx str . mode-and-start)
-      (##sys#check-string str 'string-split-fields)
-      (let* ([argc (length mode-and-start)]
-             [len (##sys#size str)]
-             [mode (if (fx> argc 0) (car mode-and-start) #t)]
-             [start (if (fx> argc 1) (cadr mode-and-start) 0)]
-             [fini (case mode
-                     [(#:suffix)
-                      (lambda (ms start)
-                        (if (fx< start len)
-                            (##sys#error 'string-split-fields
-                                         "record does not end with suffix" str rx)
-                            (reverse ms) ) ) ]
-                     [(#:infix)
-                      (lambda (ms start)
-                        (if (fx>= start len)
-                            (reverse (cons "" ms))
-                            (reverse (cons (substring str start len) ms)) ) ) ]
-                     [else (lambda (ms start) (reverse ms)) ] ) ]
-             [fetch (case mode
-                      [(#:infix #:suffix) (lambda (start from to) (substring str start from))]
-                      [else (lambda (start from to) (substring str from to))] ) ] )
-        (let loop ([ms '()] [start start])
-          (let ([m (string-search-positions rx str start)])
-            (if m
-                (let* ([mp (car m)]
-                       [from (car mp)]
-                       [to (cadr mp)] )
-                  (if (fx= from to)
-                      (if (fx= to len)
-                          (fini ms start)
-                          (loop (cons (fetch start (fx+ from 1) (fx+ to 2)) ms) (fx+ to 1)) )
-                      (loop (cons (fetch start from to) ms) to) ) )
-                (fini ms start) ) ) ) ) ) ) )
-
-
-;;; Substitute matching strings:
-
-(define string-substitute
-  (let ([substring substring]
-        [reverse reverse]
-        [make-string make-string]
-        [string-search-positions string-search-positions] )
-    (lambda (rx subst string . flag)
-      (##sys#check-string subst 'string-substitute)
-      (##sys#check-string string 'string-substitute)
-      (let* ([which (if (pair? flag) (car flag) 1)]
-             [substlen (##sys#size subst)]
-	     (strlen (##sys#size string))
-             [substlen-1 (fx- substlen 1)]
-             [result '()]
-             [total 0] )
-        (define (push x)
-          (set! result (cons x result))
-          (set! total (fx+ total (##sys#size x))) )
-        (define (substitute matches)
-          (let loop ([start 0] [index 0])
-            (if (fx>= index substlen-1)
-                (push (if (fx= start 0) subst (substring subst start substlen)))
-                (let ([c (##core#inline "C_subchar" subst index)]
-                      [index+1 (fx+ index 1)] )
-                  (if (char=? c #\\)
-                      (let ([c2 (##core#inline "C_subchar" subst index+1)])
-                        (if (and (not (char=? #\\ c2)) (char-numeric? c2))
-                            (let ([mi (list-ref matches (fx- (char->integer c2) 48))])
-                              (push (substring subst start index))
-                              (push (substring string (car mi) (cadr mi)))
-                              (loop (fx+ index 2) index+1) )
-                            (loop start (fx+ index+1 1)) ) )
-                      (loop start index+1) ) ) ) ) )
-        (let loop ([index 0] [count 1])
-          (let ((matches (and (fx< index strlen) 
-			      (string-search-positions rx string index))))
-            (cond [matches
-                   (let* ([range (car matches)]
-                          [upto (cadr range)] )
-                     (cond ((fx= 0 (fx- (cadr range) (car range)))
-                            (##sys#error
-                             'string-substitute "empty substitution match"
-                             rx) )
-                           ((or (not (fixnum? which)) (fx= count which))
-                            (push (substring string index (car range)))
-                            (substitute matches)
-                            (loop upto #f) )
-                           (else
-                            (push (substring string index upto))
-                            (loop upto (fx+ count 1)) ) ) ) ]
-                  [else
-                   (push (substring string index (##sys#size string)))
-                   (##sys#fragments->string total (reverse result)) ] ) ) ) ) ) ) )
-
-(define string-substitute*
-  (let ([string-substitute string-substitute])
-    (lambda (str smap . mode)
-      (##sys#check-string str 'string-substitute*)
-      (##sys#check-list smap 'string-substitute*)
-      (let ((mode (and (pair? mode) (car mode))))
-        (let loop ((str str) (smap smap))
-          (if (null? smap)
-              str
-              (let ((sm (car smap)))
-                (loop (string-substitute (car sm) (cdr sm) str mode)
-                      (cdr smap) ) ) ) ) ) ) ) )
-
-
-;;; Glob support:
-
-(define glob->regexp
-  (let ((list->string list->string)
-        (string->list string->list)
-	(regexp regexp))
-    (lambda (s #!optional sre?)
-      (##sys#check-string s 'glob->regexp)
-      (let ((sre
-	     (cons 
-	      ':
-	      (let loop ((cs (string->list s)) (dir #t))
-		(if (null? cs)
-		    '()
-		    (let ((c (car cs))
-			  (rest (cdr cs)) )
-		      (cond ((char=? c #\*) 
-			     (if dir
-				 `((or (: (~ ("./\\"))
-					  (* (~ ("/\\"))))
-				       (* (~ ("./\\"))))
-				   ,@(loop rest #f))
-				 `((* (~ ("/\\"))) ,@(loop rest #f))))
-			    ((char=? c #\?)  (cons 'any (loop rest #f)))
-			    ((char=? c #\[)
-			     (let loop2 ((rest rest) (s '()))
-			       (cond ((not (pair? rest))
-				      (error 'glob->regexp "unexpected end of character class" s))
-				     ((char=? #\] (car rest))
-				      `((or ,@s) ,@(loop (cdr rest) #f)))
-				     ((and (pair? (cdr rest))
-					   (pair? (cddr rest))
-					   (char=? #\- (cadr rest)) )
-				      (loop2 (cdddr rest) (cons `(/ ,(car rest) ,(caddr rest)) s)))
-				     ((and (pair? (cdr rest))
-					   (char=? #\- (car rest)))
-				      (loop2 (cddr rest)
-					     (cons `(~ ,(cadr rest)) s)))
-				     (else
-				      (loop2 (cdr rest) (cons (car rest) s))))))
-			    (else (cons c (loop rest (memq c '(#\\ #\/))))))))))))
-	(if sre? sre (regexp sre))))))
-
-
-;;; Grep-like function on list:
-
-(define grep
-  (let ((string-search string-search)
-	(regexp regexp))
-    (lambda (rx lst #!optional (acc (lambda (x) x)))
-      (##sys#check-list lst 'grep)
-      (##sys#check-closure acc 'grep)
-      (let ((rx (regexp rx)))
-	(let loop ((lst lst))
-	  (if (null? lst)
-	      '()
-	      (let ((x (##sys#slot lst 0))
-		    (r (##sys#slot lst 1)) )
-		(if (string-search rx (acc x))
-		    (cons x (loop r))
-		    (loop r) ) ) ) ) ) ) ) )
-
-
-;;; Escape regular expression (suggested by Peter Bex):
-
-(define regexp-escape
-  (let ([open-output-string open-output-string]
-        [get-output-string get-output-string] )
-    (lambda (str)
-      (##sys#check-string str 'regexp-escape)
-      (let ([out (open-output-string)]
-            [len (##sys#size str)] )
-	(let loop ([i 0])
-	  (cond [(fx>= i len) (get-output-string out)]
-                [(memq (##core#inline "C_subchar" str i)
-                       '(#\. #\\ #\? #\* #\+ #\^ #\$ #\( #\) #\[ #\] #\| #\{ #\}))
-                 (##sys#write-char-0 #\\ out)
-                 (##sys#write-char-0 (##core#inline "C_subchar" str i) out)
-                 (loop (fx+ i 1)) ]
-                [else
-                 (##sys#write-char-0 (##core#inline "C_subchar" str i) out)
-                 (loop (fx+ i 1)) ] ) ) ) ) ) )
diff --git a/rules.make b/rules.make
index 2e571510..a2888e16 100644
--- a/rules.make
+++ b/rules.make
@@ -28,16 +28,11 @@ VPATH=$(SRCDIR)
 
 # object files
 
-LIBCHICKEN_OBJECTS_1 = \
-       library eval data-structures ports files extras lolevel utils tcp srfi-1 srfi-4 srfi-13 \
-       srfi-14 srfi-18 srfi-69 $(POSIXFILE) regex scheduler \
-       profiler stub expand chicken-syntax chicken-ffi-syntax runtime
+LIBCHICKEN_OBJECTS_1 = library eval data-structures ports files extras lolevel utils tcp srfi-1 srfi-4 srfi-13 srfi-14 srfi-18 srfi-69 $(POSIXFILE) irregex scheduler profiler stub expand chicken-syntax chicken-ffi-syntax runtime
 LIBCHICKEN_SHARED_OBJECTS = $(LIBCHICKEN_OBJECTS_1:=$(O))
 LIBCHICKEN_STATIC_OBJECTS = $(LIBCHICKEN_OBJECTS_1:=-static$(O))
 
-COMPILER_OBJECTS_1 = \
-       chicken batch-driver compiler optimizer compiler-syntax scrutinizer unboxing support \
-       c-platform c-backend
+COMPILER_OBJECTS_1 = chicken batch-driver compiler optimizer compiler-syntax scrutinizer unboxing support c-platform c-backend
 COMPILER_OBJECTS        = $(COMPILER_OBJECTS_1:=$(O))
 COMPILER_STATIC_OBJECTS = $(COMPILER_OBJECTS_1:=-static$(O))
 
@@ -100,7 +95,7 @@ profiler$(O): profiler.c chicken.h $(CHICKEN_CONFIG_H)
 	$(C_COMPILER) $(C_COMPILER_OPTIONS) $(INCLUDES) \
 	  $(C_COMPILER_COMPILE_OPTION) $(C_COMPILER_OPTIMIZATION_OPTIONS) $(C_COMPILER_SHARED_OPTIONS) \
 	  $(C_COMPILER_BUILD_RUNTIME_OPTIONS) $< $(C_COMPILER_OUTPUT)
-regex$(O): regex.c chicken.h $(CHICKEN_CONFIG_H) 
+irregex$(O): irregex.c chicken.h $(CHICKEN_CONFIG_H) 
 	$(C_COMPILER) $(C_COMPILER_OPTIONS) \
 	  $(C_COMPILER_COMPILE_OPTION) $(C_COMPILER_OPTIMIZATION_OPTIONS) $(C_COMPILER_SHARED_OPTIONS) \
 	  $(C_COMPILER_BUILD_RUNTIME_OPTIONS) $< $(C_COMPILER_OUTPUT)
@@ -215,7 +210,7 @@ profiler-static$(O): profiler.c chicken.h $(CHICKEN_CONFIG_H)
 	  $(C_COMPILER_COMPILE_OPTION) $(C_COMPILER_OPTIMIZATION_OPTIONS) \
 	  $(C_COMPILER_STATIC_OPTIONS) \
 	  $(C_COMPILER_BUILD_RUNTIME_OPTIONS) $< $(C_COMPILER_OUTPUT)
-regex-static$(O): regex.c chicken.h $(CHICKEN_CONFIG_H) 
+irregex-static$(O): irregex.c chicken.h $(CHICKEN_CONFIG_H) 
 	$(C_COMPILER) $(C_COMPILER_OPTIONS) $(INCLUDES) \
 	  $(C_COMPILER_COMPILE_OPTION) $(C_COMPILER_OPTIMIZATION_OPTIONS) \
 	  $(C_COMPILER_STATIC_OPTIONS) \
@@ -326,10 +321,6 @@ extras.import$(O): extras.import.c chicken.h $(CHICKEN_CONFIG_H)
 	$(HOST_C_COMPILER) $(HOST_C_COMPILER_OPTIONS) $(HOST_C_COMPILER_PTABLES_OPTIONS) $(INCLUDES) -DC_SHARED \
 	  $(HOST_C_COMPILER_COMPILE_OPTION) $(HOST_C_COMPILER_OPTIMIZATION_OPTIONS) $(HOST_C_COMPILER_SHARED_OPTIONS) \
 	  $(HOST_C_COMPILER_BUILD_RUNTIME_OPTIONS) $< $(HOST_C_COMPILER_OUTPUT)
-regex.import$(O): regex.import.c chicken.h $(CHICKEN_CONFIG_H)
-	$(HOST_C_COMPILER) $(HOST_C_COMPILER_OPTIONS) $(HOST_C_COMPILER_PTABLES_OPTIONS) $(INCLUDES) -DC_SHARED \
-	  $(HOST_C_COMPILER_COMPILE_OPTION) $(HOST_C_COMPILER_OPTIMIZATION_OPTIONS) $(HOST_C_COMPILER_SHARED_OPTIONS) \
-	  $(HOST_C_COMPILER_BUILD_RUNTIME_OPTIONS) $< $(HOST_C_COMPILER_OUTPUT)
 irregex.import$(O): irregex.import.c chicken.h $(CHICKEN_CONFIG_H)
 	$(HOST_C_COMPILER) $(HOST_C_COMPILER_OPTIONS) $(HOST_C_COMPILER_PTABLES_OPTIONS) $(INCLUDES) -DC_SHARED \
 	  $(HOST_C_COMPILER_COMPILE_OPTION) $(HOST_C_COMPILER_OPTIMIZATION_OPTIONS) $(HOST_C_COMPILER_SHARED_OPTIONS) \
@@ -679,7 +670,6 @@ ifdef STATICBUILD
 	$(INSTALL_PROGRAM) $(INSTALL_PROGRAM_FILE_OPTIONS) srfi-13.import.scm "$(DESTDIR)$(IEGGDIR)"
 	$(INSTALL_PROGRAM) $(INSTALL_PROGRAM_FILE_OPTIONS) srfi-69.import.scm "$(DESTDIR)$(IEGGDIR)"
 	$(INSTALL_PROGRAM) $(INSTALL_PROGRAM_FILE_OPTIONS) extras.import.scm "$(DESTDIR)$(IEGGDIR)"
-	$(INSTALL_PROGRAM) $(INSTALL_PROGRAM_FILE_OPTIONS) regex.import.scm "$(DESTDIR)$(IEGGDIR)"
 	$(INSTALL_PROGRAM) $(INSTALL_PROGRAM_FILE_OPTIONS) srfi-14.import.scm "$(DESTDIR)$(IEGGDIR)"
 	$(INSTALL_PROGRAM) $(INSTALL_PROGRAM_FILE_OPTIONS) tcp.import.scm "$(DESTDIR)$(IEGGDIR)"
 	$(INSTALL_PROGRAM) $(INSTALL_PROGRAM_FILE_OPTIONS) foreign.import.scm "$(DESTDIR)$(IEGGDIR)"
@@ -702,7 +692,6 @@ else
 	$(INSTALL_PROGRAM) $(INSTALL_PROGRAM_EXECUTABLE_OPTIONS) srfi-13.import.so "$(DESTDIR)$(IEGGDIR)"
 	$(INSTALL_PROGRAM) $(INSTALL_PROGRAM_EXECUTABLE_OPTIONS) srfi-69.import.so "$(DESTDIR)$(IEGGDIR)"
 	$(INSTALL_PROGRAM) $(INSTALL_PROGRAM_EXECUTABLE_OPTIONS) extras.import.so "$(DESTDIR)$(IEGGDIR)"
-	$(INSTALL_PROGRAM) $(INSTALL_PROGRAM_EXECUTABLE_OPTIONS) regex.import.so "$(DESTDIR)$(IEGGDIR)"
 	$(INSTALL_PROGRAM) $(INSTALL_PROGRAM_EXECUTABLE_OPTIONS) srfi-14.import.so "$(DESTDIR)$(IEGGDIR)"
 	$(INSTALL_PROGRAM) $(INSTALL_PROGRAM_EXECUTABLE_OPTIONS) tcp.import.so "$(DESTDIR)$(IEGGDIR)"
 	$(INSTALL_PROGRAM) $(INSTALL_PROGRAM_EXECUTABLE_OPTIONS) foreign.import.so "$(DESTDIR)$(IEGGDIR)"
@@ -740,7 +729,6 @@ ifneq ($(POSTINSTALL_PROGRAM),true)
 	$(POSTINSTALL_PROGRAM) $(POSTINSTALL_PROGRAM_FLAGS) "$(DESTDIR)$(IEGGDIR)$(SEP)srfi-13.import.so"
 	$(POSTINSTALL_PROGRAM) $(POSTINSTALL_PROGRAM_FLAGS) "$(DESTDIR)$(IEGGDIR)$(SEP)srfi-69.import.so"
 	$(POSTINSTALL_PROGRAM) $(POSTINSTALL_PROGRAM_FLAGS) "$(DESTDIR)$(IEGGDIR)$(SEP)extras.import.so"
-	$(POSTINSTALL_PROGRAM) $(POSTINSTALL_PROGRAM_FLAGS) "$(DESTDIR)$(IEGGDIR)$(SEP)regex.import.so"
 	$(POSTINSTALL_PROGRAM) $(POSTINSTALL_PROGRAM_FLAGS) "$(DESTDIR)$(IEGGDIR)$(SEP)irregex.import.so"
 	$(POSTINSTALL_PROGRAM) $(POSTINSTALL_PROGRAM_FLAGS) "$(DESTDIR)$(IEGGDIR)$(SEP)srfi-14.import.so"
 	$(POSTINSTALL_PROGRAM) $(POSTINSTALL_PROGRAM_FLAGS) "$(DESTDIR)$(IEGGDIR)$(SEP)tcp.import.so"
@@ -864,8 +852,8 @@ posixunix.c: $(SRCDIR)posixunix.scm $(SRCDIR)posix-common.scm $(SRCDIR)common-de
 	$(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@ 
 posixwin.c: $(SRCDIR)posixwin.scm $(SRCDIR)posix-common.scm $(SRCDIR)common-declarations.scm
 	$(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@ 
-regex.c: $(SRCDIR)regex.scm $(SRCDIR)irregex.scm $(SRCDIR)common-declarations.scm
-	$(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@ 
+irregex.c: $(SRCDIR)irregex.scm $(SRCDIR)irregex-core.scm $(SRCDIR)irregex-utils.scm $(SRCDIR)common-declarations.scm
+	$(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@
 scheduler.c: $(SRCDIR)scheduler.scm $(SRCDIR)common-declarations.scm
 	$(CHICKEN) $< $(CHICKEN_LIBRARY_OPTIONS) -output-file $@ 
 profiler.c: $(SRCDIR)profiler.scm $(SRCDIR)common-declarations.scm
@@ -895,8 +883,6 @@ srfi-69.import.c: $(SRCDIR)srfi-69.import.scm
 	$(CHICKEN) $< $(CHICKEN_IMPORT_LIBRARY_OPTIONS) -output-file $@ 
 extras.import.c: $(SRCDIR)extras.import.scm
 	$(CHICKEN) $< $(CHICKEN_IMPORT_LIBRARY_OPTIONS) -output-file $@ 
-regex.import.c: $(SRCDIR)regex.import.scm
-	$(CHICKEN) $< $(CHICKEN_IMPORT_LIBRARY_OPTIONS) -output-file $@ 
 irregex.import.c: $(SRCDIR)irregex.import.scm
 	$(CHICKEN) $< $(CHICKEN_IMPORT_LIBRARY_OPTIONS) -output-file $@ 
 srfi-14.import.c: $(SRCDIR)srfi-14.import.scm
@@ -980,7 +966,7 @@ setup-download.c: $(SRCDIR)setup-download.scm setup-api.c
 distfiles: library.c eval.c expand.c chicken-syntax.c chicken-ffi-syntax.c \
 	data-structures.c ports.c files.c extras.c lolevel.c utils.c \
 	tcp.c srfi-1.c srfi-4.c srfi-13.c srfi-14.c srfi-18.c srfi-69.c \
-	posixunix.c posixwin.c regex.c scheduler.c profiler.c stub.c \
+	posixunix.c posixwin.c irregex.c scheduler.c profiler.c stub.c \
 	chicken-profile.c chicken-install.c chicken-uninstall.c chicken-status.c \
 	csc.c csi.c chicken.c batch-driver.c compiler.c optimizer.c  \
 	compiler-syntax.c scrutinizer.c unboxing.c support.c \
@@ -1023,7 +1009,7 @@ spotless: distclean testclean
 	-$(REMOVE_COMMAND) $(REMOVE_COMMAND_OPTIONS) library.c eval.c data-structures.c \
 	  ports.c files.c extras.c lolevel.c utils.c chicken-syntax.c chicken-ffi-syntax.c \
 	  tcp.c srfi-1.c srfi-4.c srfi-13.c srfi-14.c srfi-18.c srfi-69.c expand.c \
-	  posixunix.c posixwin.c regex.c scheduler.c profiler.c stub.c \
+	  posixunix.c posixwin.c irregex.c scheduler.c profiler.c stub.c \
 	  chicken-profile.c chicken-bug.c \
 	  csc.c csi.c chicken-install.c chicken-uninstall.c chicken-status.c \
 	  chicken.c batch-driver.c compiler.c optimizer.c compiler-syntax.c \
@@ -1071,11 +1057,11 @@ buildhead:
 boot-chicken:
 	$(MAKE) -f Makefile.$(PLATFORM) PLATFORM=$(PLATFORM) PREFIX=/nowhere CONFIG= \
 	  SRCDIR=$(SRCDIR) CHICKEN=$(CHICKEN) PROGRAM_SUFFIX=-boot-stage1 STATICBUILD=1 \
-	  C_COMPILER_OPTIMIZATION_OPTIONS= HACKED_APPLY= \
+	  C_COMPILER_OPTIMIZATION_OPTIONS= \
 	  confclean chicken-boot-stage1$(EXE)
 	$(MAKE) -f Makefile.$(PLATFORM) PLATFORM=$(PLATFORM) PREFIX=/nowhere CONFIG= \
-	  SRCDIR=$(SRCDIR) CHICKEN=$(PWD)/chicken-boot-stage1$(EXE) PROGRAM_SUFFIX=-boot \
-	  STATICBUILD=1 HACKED_APPLY= C_COMPILER_OPTIMIZATION_OPTIONS= \
+	  SRCDIR=$(SRCDIR) CHICKEN=$(PWD)/chicken-boot-stage1$(EXE) PROGRAM_SUFFIX=-boot STATICBUILD=1 \
+	  C_COMPILER_OPTIMIZATION_OPTIONS= \
 	  touchfiles chicken-boot$(EXE) confclean
 
 .PHONY: touchfiles
diff --git a/scripts/make-egg-index.scm b/scripts/make-egg-index.scm
index 5afb9d1a..27df8d36 100644
--- a/scripts/make-egg-index.scm
+++ b/scripts/make-egg-index.scm
@@ -2,9 +2,8 @@
 
 (load-relative "tools.scm")
 
-(use setup-download matchable sxml-transforms data-structures regex)
+(use setup-download matchable sxml-transforms data-structures irregex)
 
-(import irregex)
 
 (define *help* #f)
 (define *major-version* (##sys#fudge 41))
diff --git a/scripts/makedist.scm b/scripts/makedist.scm
index e44f234c..a9ca70d7 100644
--- a/scripts/makedist.scm
+++ b/scripts/makedist.scm
@@ -1,7 +1,7 @@
 ;;;; makedist.scm - Make distribution tarballs
 
 
-(use srfi-69)
+(use srfi-69 irregex)
 
 (define *release* #f)
 
@@ -15,7 +15,7 @@
 
 (define *platform* 
   (let ((sv (symbol->string (software-version))))
-    (cond ((string-match ".*bsd" sv) "bsd")
+    (cond ((irregex-match ".*bsd" sv) "bsd")
 	  (else
 	   (case (build-platform)
 	     ((mingw32) 
diff --git a/scripts/setversion b/scripts/setversion
index a46d7290..d751ee66 100644
--- a/scripts/setversion
+++ b/scripts/setversion
@@ -10,10 +10,6 @@ exec csi -s "$0" "$@"
 
 (define files '("README" "manual/The User's Manual"))
 
-(define-syntax rx
-  (syntax-rules ()
-    ((_ r) (force (delay (regexp r))))))
-
 (define (patch which rx subst)
   (cond ((and (list? which) (= 2 (length which)))
 	 (let ((from (car which))
@@ -26,17 +22,17 @@ exec csi -s "$0" "$@"
 		   (let loop ()
 		     (let ((ln (read-line)))
 		       (unless (eof-object? ln)
-			 (write-line (string-substitute rx subst ln #t)) 
+			 (write-line (irregex-replace/all rx ln subst))
 			 (loop) ) ) ) )
-		 binary:) )
-	     binary:)))
+		 #:binary) )
+	     #:binary)))
 	(else
 	 (let ((tmp (create-temporary-file)))
 	   (patch (list which tmp) rx subst)
 	   (system* "mv ~S ~S" tmp which) ) ) ) )
 
 (define (parse-version v)
-  (string-match (rx "(\\d+)\\.(\\d+)\\.(\\d+)(.*)") v) )
+  (string-match (irregex "(\\d+)\\.(\\d+)\\.(\\d+)(.*)") v) )
 
 (define (main args)
   (let ((major (##sys#fudge 41))
@@ -65,14 +61,14 @@ exec csi -s "$0" "$@"
       binary:)
     (system* "cat version.scm")
     (let ([vstr (sprintf "version ~A" buildversion)])
-      (for-each (cut patch <> (rx "version [0-9][-.0-9a-zA-Z]+") vstr) files) )
+      (for-each (cut patch <> (irregex "version [0-9][-.0-9a-zA-Z]+") vstr) files) )
     (patch 
      "chicken.h"
-     (rx "C_MAJOR_VERSION[ \\t]+[0-9]+")
+     (irregex "C_MAJOR_VERSION[ \\t]+[0-9]+")
      (sprintf "C_MAJOR_VERSION   ~a" major))
     (patch 
      "chicken.h"
-     (rx "C_MINOR_VERSION[ \\t]+[0-9]+")
+     (irregex "C_MINOR_VERSION[ \\t]+[0-9]+")
      (sprintf "C_MINOR_VERSION   ~a" minor))
     0))
 
diff --git a/scripts/tools.scm b/scripts/tools.scm
index b4c3d435..fe53ae59 100644
--- a/scripts/tools.scm
+++ b/scripts/tools.scm
@@ -341,15 +341,19 @@
 			       (set! debug #t) )
 			      (else (usage 1)) )
 			(loop (cdr args)) )
-		       ((string-match "([-_A-Za-z0-9]+)=(.*)" x) =>
+		       ((irregex-match "([-_A-Za-z0-9]+)=(.*)" x) =>
 			(lambda (m)
-			  (let* ((sym (string->symbol (cadr m))))
+			  (let* ((sym (string->symbol (irregex-match-substring m 1))))
 			    (if (##sys#symbol-has-toplevel-binding? sym)
 				(let ((val (##sys#slot sym 0)))
-				  (if (or (boolean? val) (string? val) (symbol? val) (eq? (void) val))
-				      (##sys#setslot sym 0 (caddr m)) 
-				      (quit "variable `~a' already has a suspicious value" sym) ) )
-				(##sys#setslot sym 0 (caddr m)) )
+				  (if (or (boolean? val)
+					  (string? val)
+					  (symbol? val)
+					  (eq? (void) val))
+				      (##sys#setslot sym 0 (irregex-match-substring m 2)) 
+				      (quit "variable `~a' already has a suspicious value" 
+					    sym) ) )
+				(##sys#setslot sym 0 (irregex-match-substring m 2)) )
 			    (loop (cdr args)) ) ) )
 		       (else
 			(set! targets (cons x targets))
@@ -424,22 +428,24 @@
 	 val)))
   (let loop ((args args) (vals '()))
     (cond ((null? args) (reverse vals))
-	  ((string-match "(-{1,2})([-_A-Za-z0-9]+)(=)?\\s*(.+)?" (car args)) 
+	  ((irregex-match "(-{1,2})([-_A-Za-z0-9]+)(=)?\\s*(.+)?" (car args)) 
 	   =>
 	   (lambda (m)
 	     (let*-values (((next) (cdr args))
 			   ((var val)
-			    (cond ((equal? "=" (fourth m))
-				   (let ((opt (third m))
-					 (val (fifth m)))
+			    (cond ((equal? "=" (irregex-match-substring m 3))
+				   (let ((opt (irregex-match-substring m 2))
+					 (val (irregex-match-substring m 4)))
 				     (cond (val (values opt val))
 					   (else 
 					    (when (null? next)
-					      (error "missing argument for option" (car args)) )
+					      (error "missing argument for option" 
+						     (car args)) )
 					    (let ((x (car next)))
 					      (set! next (cdr next))
 					      (values opt x))))) )
-				  ((string? (second m)) (values (third m) #t))
+				  ((string? (irregex-match-substring m 1))
+				   (values (irregex-match-substring m 2) #t))
 				  (else (values #f #f)) ) ) )
 	       (cond (var 
 		      (assign var val)
diff --git a/setup-api.scm b/setup-api.scm
index 9c14871a..9ef3635c 100644
--- a/setup-api.scm
+++ b/setup-api.scm
@@ -24,7 +24,7 @@
 ; POSSIBILITY OF SUCH DAMAGE.
 
 
-(require-library srfi-1 regex utils posix srfi-13 extras ports data-structures files)
+(require-library srfi-1 irregex utils posix srfi-13 extras ports data-structures files)
 
 ; This code is partially quite messy and the API is not overly consistent,
 ; mainly because it has grown "organically" while the old chicken-setup program
@@ -67,7 +67,7 @@
      setup-error-handling)
   
   (import scheme chicken foreign
-	  regex utils posix ports extras data-structures
+	  irregex utils posix ports extras data-structures
 	  srfi-1 srfi-13 files)
 
 ;;; Constants, variables and parameters
@@ -197,7 +197,7 @@
 	     (let loop ()
 	       (let ((ln (read-line)))
 		 (unless (eof-object? ln)
-		   (write-line (string-substitute rx subst ln #t)) 
+		   (write-line (irregex-replace/all rx ln subst))
 		   (loop) ) ) ) ) ) ) )
       (let ((tmp (create-temporary-file)))
 	(patch (list tmp tmp) rx subst)
@@ -728,7 +728,7 @@
 (define (version>=? v1 v2)
   (define (version->list v)
     (map (lambda (x) (or (string->number x) x))
-	 (string-split-fields "[-\\._]" (->string v) #:infix)))
+	 (irregex-split "[-\\._]" (->string v))))
   (let loop ((p1 (version->list v1))
 	     (p2 (version->list v2)))
     (cond ((null? p1) (null? p2))
diff --git a/setup-download.scm b/setup-download.scm
index 2973a42d..9cd057c6 100644
--- a/setup-download.scm
+++ b/setup-download.scm
@@ -24,7 +24,7 @@
 ; POSSIBILITY OF SUCH DAMAGE.
 
 
-(require-library extras regex posix utils setup-api srfi-1 data-structures tcp srfi-13
+(require-library extras irregex posix utils setup-api srfi-1 data-structures tcp srfi-13
 		 files)
 
 
@@ -38,11 +38,13 @@
 			temporary-directory)
 
   (import scheme chicken)
-  (import extras regex posix utils srfi-1 data-structures tcp srfi-13 files setup-api)
+  (import extras irregex posix utils srfi-1 data-structures tcp srfi-13 files setup-api)
 
   (define-constant +default-tcp-connect-timeout+ 10000) ; 10 seconds
   (define-constant +default-tcp-read/write-timeout+ 20000) ; 20 seconds
 
+  (define-constant +url-regex+ "(http://)?([^/:]+)(:([^:/]+))?(/.+)")
+
   (tcp-connect-timeout +default-tcp-connect-timeout+)
   (tcp-read-timeout +default-tcp-read/write-timeout+)
   (tcp-write-timeout +default-tcp-read/write-timeout+)
@@ -157,7 +159,9 @@
 	     [tagver (existing-version
 	              egg version
 	              (filter-map
-                       (lambda (f) (and-let* ((m (string-search "^tags/([^/]+)/" f))) (cadr m)))
+                       (lambda (f) 
+			 (and-let* ((m (irregex-search "^tags/([^/]+)/" f))) 
+			   (irregex-match-substring m 1)))
                        files))])
 	(let-values ([(filedir ver)
 	              (if tagver
@@ -189,14 +193,15 @@
     (conc dir #\/ egg ".meta"))
 
   (define (deconstruct-url url)
-    (let ([m (string-match "(http://)?([^/:]+)(:([^:/]+))?(/.+)" url)])
+    (let ([m (irregex-match +url-regex+ url)])
       (values
-       (if m (caddr m) url)
-       (if (and m (cadddr m))
-	   (or (string->number (list-ref m 4))
-	       (error "not a valid port" (list-ref m 4)))
+       (if m (irregex-match-substring m 2) url)
+       (if (and m (irregex-match-substring m 3))
+	   (let ((port (irregex-match-substring m 4)))
+	     (or (string->number port)
+		 (error "not a valid port" port)))
 	   80)
-       (if m (list-ref m 5) "/")) ) )
+       (if m (irregex-match-substring m 5) "/")) ) )
 
   (define (locate-egg/http egg url #!optional version destination tests
 			   proxy-host proxy-port)
@@ -245,13 +250,13 @@
 
   (define (match-http-response rsp)
     (and (string? rsp)
-         (string-match "HTTP/[0-9.]+\\s+([0-9]+)\\s+.*" rsp)) )
+         (irregex-match "HTTP/[0-9.]+\\s+([0-9]+)\\s+.*" rsp)) )
 
   (define (response-match-code? mrsp code)
-    (and mrsp (string=? (number->string code) (cadr mrsp))) )
+    (and mrsp (string=? (number->string code) (irregex-match-substring mrsp 1))) )
 
   (define (match-chunked-transfer-encoding ln)
-    (string-match "[Tt]ransfer-[Ee]ncoding:\\s*chunked.*" ln) )
+    (irregex-match "[Tt]ransfer-[Ee]ncoding:\\s*chunked.*" ln) )
 
   (define (http-fetch host port locn dest proxy-host proxy-port)
     (d "connecting to host ~s, port ~a ~a...~%" host port
diff --git a/setup.defaults b/setup.defaults
index 4bf7ab32..604980c1 100644
--- a/setup.defaults
+++ b/setup.defaults
@@ -24,7 +24,7 @@
 (map
  (data-structures 
   extras files foreign irregex lolevel ports tcp utils
-  posix regex setup-api setup-download
+  posix irregex setup-api setup-download
   srfi-1 srfi-4 srfi-13 srfi-14 srfi-18 srfi-69 
   ->) )
 
diff --git a/tests/re-tests.txt b/tests/re-tests.txt
index a73604c6..3b7bf976 100644
--- a/tests/re-tests.txt
+++ b/tests/re-tests.txt
@@ -104,6 +104,9 @@ a[bcd]*dcdcde	adcdcde	y	&	adcdcde
 a[bcd]+dcdcde	adcdcde	n	-	-
 (ab|a)b*c	abc	y	&-\1	abc-ab
 ((a)(b)c)(d)	abcd	y	\1-\2-\3-\4	abc-a-b-d
+((a)(b)?c)(d)	abcd	y	\1-\2-\3-\4	abc-a-b-d
+((a)(b)?c)(d)	acd	y	\1-\2-\3-\4	ac-a--d
+((aa)(bb)?cc)(dd)	aaccdd	y	\1-\2-\3-\4	aacc-aa--dd
 [ -~]*	abc	y	&	abc
 [ -~ -~]*	abc	y	&	abc
 [ -~ -~ -~]*	abc	y	&	abc
@@ -118,8 +121,13 @@ a[bcd]+dcdcde	adcdcde	n	-	-
 (bc+d$|ef*g.|h?i(j|k))	effg	n	-	-
 (bc+d$|ef*g.|h?i(j|k))	bcdd	n	-	-
 (bc+d$|ef*g.|h?i(j|k))	reffgz	y	&-\1-\2	effgz-effgz-
+((((((((((a)))))))))	-	c	-	-
+((((((((((a))))))))))	a	y	&-\10	a-a
 (((((((((a)))))))))	a	y	&	a
 multiple words of text	uh-uh	n	-	-
 multiple words	multiple words, yeah	y	&	multiple words
 (.*)c(.*)	abcde	y	&-\1-\2	abcde-ab-de
 \((.*), (.*)\)	(a, b)	y	(\2, \1)	(b, a)
+(we|wee|week)(knights|night)	weeknights	y	&-\1-\2	weeknights-wee-knights
+(a([^a])*)*	abcaBC	y	&-\1-\2	abcaBC-aBC-C
+([Aa]b).*\1	abxyzab	y	&-\1	abxyzab-ab
diff --git a/tests/runtests.sh b/tests/runtests.sh
index e2c2eab6..ae147777 100644
--- a/tests/runtests.sh
+++ b/tests/runtests.sh
@@ -20,7 +20,7 @@ for x in setup-api.so setup-api.import.so setup-download.so \
       srfi-1.import.so srfi-4.import.so data-structures.import.so \
       ports.import.so files.import.so posix.import.so \
       srfi-13.import.so srfi-69.import.so extras.import.so \
-      regex.import.so srfi-14.import.so tcp.import.so \
+      irregex.import.so srfi-14.import.so tcp.import.so \
       foreign.import.so scheme.import.so srfi-18.import.so \
       utils.import.so csi.import.so irregex.import.so types.db; do
   cp ../$x test-repository
diff --git a/tests/sgrep.scm b/tests/sgrep.scm
index 7503256a..555829e4 100644
--- a/tests/sgrep.scm
+++ b/tests/sgrep.scm
@@ -1,7 +1,7 @@
 ;;;; sgrep.scm - grepping benchmark
 
 
-(use regex extras utils posix srfi-1)
+(use irregex extras utils posix srfi-1)
 
 
 (define big-string
@@ -21,7 +21,7 @@
 	       (lambda (line)
 		 (set! c (fx+ c 1))
 		 ;(when (zero? (fxmod c 500)) (print* "."))
-		 (when (string-search expr line)
+		 (when (irregex-search expr line)
 		   (set! h (fx+ h 1)))
 		 #f))
 	      ;(newline)
diff --git a/tests/test-glob.scm b/tests/test-glob.scm
index a5732384..91fc3d64 100644
--- a/tests/test-glob.scm
+++ b/tests/test-glob.scm
@@ -1,20 +1,20 @@
 ;;;; test-glob.scm - test glob-pattern -> regex translation
 
 
-(use regex)
+(use irregex)
 
-(assert (string-match (glob->regexp "foo.bar") "foo.bar"))
-(assert (string-match (glob->regexp "foo*") "foo.bar"))
-(assert (string-match (glob->regexp "foo/*") "foo/bar"))
-(assert (not (string-match (glob->regexp "foo/*") "foo/bar/baz")))
-(assert (string-match (glob->regexp "foo/*/*") "foo/bar/baz"))
-(assert (not (string-match (glob->regexp "foo/*") "foo/.bar")))
-(assert (string-match (glob->regexp "*foo") "xyzfoo"))
-(assert (not (string-match (glob->regexp "*foo") ".foo")))
-(assert (not (string-match (glob->regexp "*foo*") "a.fooxxx/yyy")))
-(assert (string-match (glob->regexp "*foo*") "fooxxx"))
-(assert (string-match (glob->regexp "main.[ch]") "main.c"))
-(assert (string-match (glob->regexp "main.[ch]") "main.h"))
-(assert (not (string-match (glob->regexp "main.[ch]") "main.cpp")))
-(assert (string-match (glob->regexp "main.[-c]") "main.h"))
-(assert (not (string-match (glob->regexp "main.[-h]") "main.h")))
+(assert (irregex-match (##sys#glob->regexp "foo.bar") "foo.bar"))
+(assert (irregex-match (##sys#glob->regexp "foo*") "foo.bar"))
+(assert (irregex-match (##sys#glob->regexp "foo/*") "foo/bar"))
+(assert (not (irregex-match (##sys#glob->regexp "foo/*") "foo/bar/baz")))
+(assert (irregex-match (##sys#glob->regexp "foo/*/*") "foo/bar/baz"))
+(assert (not (irregex-match (##sys#glob->regexp "foo/*") "foo/.bar")))
+(assert (irregex-match (##sys#glob->regexp "*foo") "xyzfoo"))
+(assert (not (irregex-match (##sys#glob->regexp "*foo") ".foo")))
+(assert (not (irregex-match (##sys#glob->regexp "*foo*") "a.fooxxx/yyy")))
+(assert (irregex-match (##sys#glob->regexp "*foo*") "fooxxx"))
+(assert (irregex-match (##sys#glob->regexp "main.[ch]") "main.c"))
+(assert (irregex-match (##sys#glob->regexp "main.[ch]") "main.h"))
+(assert (not (irregex-match (##sys#glob->regexp "main.[ch]") "main.cpp")))
+(assert (irregex-match (##sys#glob->regexp "main.[-c]") "main.h"))
+(assert (not (irregex-match (##sys#glob->regexp "main.[-h]") "main.h")))
diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm
index 12d49adf..917e6e6a 100644
--- a/tests/test-irregex.scm
+++ b/tests/test-irregex.scm
@@ -1,15 +1,13 @@
 ;;;: test-irregex.scm
 
 
-(use extras regex)
+(use extras irregex)
 
 (include "test.scm")
 
-(import irregex)
-
 (define (subst-matches matches subst)
   (define (submatch n)
-    (if (vector? matches)
+    (if (irregex-match-data? matches)
         (irregex-match-substring matches n)
         (list-ref matches n)))
   (and
@@ -28,35 +26,41 @@
                    ((#\\)
                     (let ((c (read-char in)))
                       (if (char-numeric? c)
-                          (display
-                           (or (submatch (string->number (string c))) "")
-                           out)
+                          (let lp ((res (list c)))
+                            (if (and (char? (peek-char in))
+                                     (char-numeric? (peek-char in)))
+                                (lp (cons (read-char in) res))
+                                (display
+                                 (or (submatch (string->number
+                                                (list->string (reverse res))))
+                                     "")
+                                 out)))
                           (write-char c out))))
                    (else
                     (write-char c out)))
                  (lp)))))))))))
 
 (define (test-re matcher line)
-  (apply 
-   (lambda (pattern input result subst output)
-     (let ((name (sprintf "~A  ~A  ~A" pattern input result)))
-       (cond
-	((equal? "c" result)
-	 (test-error name (matcher pattern input)))
-	((equal? "n" result)
-	 (test-assert name (not (matcher pattern input))))
-	((equal? "y" result)
-	 (test-assert name (matcher pattern input)))
-	(else
-	 (test-equal name
-		     (subst-matches (matcher pattern input) subst)
-		     result)))))
-   (string-split line "\t" #t)))
-
+  (let ((splt (string-split line "\t" #t)))
+    (if (list? splt)
+	(apply
+	 (lambda (pattern input result subst output)
+	   (let ((name (sprintf "~A  ~A  ~A  ~A" pattern input result subst)))
+	     (cond
+	      ((equal? "c" result)
+	       (test-error name (matcher pattern input)))
+	      ((equal? "n" result)
+	       (test-assert name (not (matcher pattern input))))
+	      (else
+	       (test-equal name output
+		     (subst-matches (matcher pattern input) subst))))))
+	 splt)
+	(warning "invalid regex test line" line))))
 
 (test-begin)
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; basic irregex
 
 (for-each
  (lambda (opts)
@@ -69,9 +73,139 @@
                        (irregex-search (apply irregex pat opts) str))
                      line))
           read-line)))))
- '((small) (fast)))
+ '((backtrack)
+   (fast)
+   ))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; chunked irregex
+
+(define (rope . args)
+  (map (lambda (x) (if (pair? x) x (list x 0 (string-length x)))) args))
+
+(define rope-chunker
+  (make-irregex-chunker
+   (lambda (x) (and (pair? (cdr x)) (cdr x)))
+   caar
+   cadar
+   caddar
+   (lambda (src1 i src2 j)
+     (if (eq? src1 src2)
+         (substring (caar src1) i j)
+         (let lp ((src (cdr src1))
+                  (res (list (substring (caar src1) i (caddar src1)))))
+           (if (eq? src src2)
+               (string-intersperse
+                (reverse (cons (substring (caar src2) (cadar src2) j) res))
+                "")
+               (lp (cdr src)
+                   (cons (substring (caar src) (cadar src) (caddar src))
+                         res))))))))
+
+(define (make-ropes str)
+  (let ((len (string-length str)))
+    (case len
+      ((0 1)
+       (list (rope str)))
+      ((2)
+       (list (rope str)
+             (rope (substring str 0 1) (substring str 1 2))))
+      ((3)
+       (list (rope str)
+             (rope (substring str 0 1) (substring str 1 3))
+             (rope (substring str 0 2) (substring str 2 3))
+             (rope (substring str 0 1)
+                   (substring str 1 2)
+                   (substring str 2 3))))
+      (else
+       (let ((mid (quotient (+ len 1) 2)))
+         (list (rope str)
+               (rope (substring str 0 1) (substring str 1 len))
+               (rope (substring str 0 mid) (substring str mid len))
+               (rope (substring str 0 (- len 1))
+                     (substring str (- len 1) len))
+               (rope (substring str 0 1)
+                     (substring str 1 mid)
+                     (substring str mid len))
+               ))))))
+
+(define (make-shared-ropes str)
+  (let ((len (string-length str)))
+    (case len
+      ((0 1)
+       '())
+      ((2)
+       (list (list (list str 0 1) (list str 1 2))))
+      ((3)
+       (list (list (list str 0 1) (list str 1 3))
+             (list (list str 0 2) (list str 2 3))
+             (list (list str 0 1) (list str 1 2) (list str 2 3))))
+      (else
+       (let ((mid (quotient (+ len 1) 2)))
+         (list (list (list str 0 1) (list str 1 len))
+               (list (list str 0 mid) (list str mid len))
+               (list (list str 0 (- len 1))
+                     (list str (- len 1) len))
+               (list (list str 0 1) (list str 1 mid) (list str mid len))
+               ))))))
+
+(for-each
+ (lambda (opts)
+   (test-group
+    (sprintf "irregex/chunked - ~S" opts)
+    (with-input-from-file "re-tests.txt"
+      (lambda ()
+	(port-for-each
+	 (lambda (line)
+	   (let ((splt (string-split line "\t" #t)))
+	     (if (list? splt)
+		 (apply 
+		  (lambda (pattern input result subst output)
+		    (let ((name
+			   (sprintf "~A  ~A  ~A  ~A" pattern input result subst)))
+		      (cond
+		       ((equal? "c" result))
+		       ((equal? "n" result)
+			(for-each
+			 (lambda (rope)
+			   (test-assert name
+					(not (irregex-search/chunked pattern
+								     rope-chunker
+								     rope))))
+			 (append (make-ropes input)
+				 (make-shared-ropes input))))
+		       (else
+			(for-each
+			 (lambda (rope)
+			   (test-equal
+			    name output
+			    (subst-matches (irregex-search/chunked pattern
+								   rope-chunker
+								   rope)
+					   subst)))
+			 (append (make-ropes input)
+				 (make-shared-ropes input)))))))
+		  splt)
+		 (warning "invalid regex test line" line))))
+	 read-line)))))
+ '((backtrack)
+   (fast)
+   ))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; pregexp
+
+'(test-group "pregexp"
+   (with-input-from-file "re-tests.txt"
+     (lambda ()
+       (port-for-each
+        (lambda (line) (test-re pregexp-match line))
+        read-line))))
 
-(test-group "regex"
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; default regex (PCRE)
+
+'(test-group "regex"
    (with-input-from-file "re-tests.txt"
      (lambda ()
        (port-for-each
@@ -80,14 +214,238 @@
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
+(test-group "unmatchable patterns"
+  (test-assert (not (irregex-search '(or) "abc")))
+  (test-assert (not (irregex-search '(: "ab" (or)) "abc")))
+  (test-assert (not (irregex-search '(submatch "ab" (or)) "abc")))
+  (test-assert (not (irregex-search '(: "ab" (submatch (or))) "abc")))
+  (test-assert (not (irregex-search '(/) "abc")))
+  (test-assert (not (irregex-search '(: "ab" (/)) "abc")))
+  (test-assert (not (irregex-search '(~ any) "abc")))
+  (test-assert (not (irregex-search '(: "ab" (~ any)) "abc")))
+  (test-assert (not (irregex-search '("") "abc")))
+  (test-assert (not (irregex-search '(: "ab" ("")) "abc")))
+  )
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(test-group "beginning/end of chunks"
+  (test-assert
+      (irregex-search/chunked '(: bos "foo") rope-chunker '((" foo" 0 4)) 1))
+  (test-assert
+      (irregex-search/chunked '(: bos "foo") rope-chunker '(("  foo" 1 5)) 2))
+  (test-assert
+      (irregex-search/chunked '(: bos "foo" eos) rope-chunker '((" foo" 1 4)) 1))
+  (test-assert
+      (irregex-search/chunked '(: bos "foo" eos) rope-chunker '(("  foo" 2 5)) 2))
+  (test-assert
+      (irregex-search/chunked '(: bos "foo" eos) rope-chunker '((" foo" 0 4)) 1))
+  (test-assert
+      (irregex-search/chunked '(: bos "foo" eos) rope-chunker '(("  foo" 1 5)) 2))
+  )
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(test-group "API"
+  (test-assert (irregex? (irregex "a.*b")))
+  (test-assert (irregex? (irregex '(: "a" (* any) "b"))))
+  (test-assert (not (irregex? (vector '*irregex-tag* #f #f #f #f #f #f #f))))
+  (test-assert (not (irregex? (vector #f #f #f #f #f #f #f #f #f))))
+  (test-assert (irregex-match-data? (irregex-search "a.*b" "axxxb")))
+  (test-assert (irregex-match-data? (irregex-match "a.*b" "axxxb")))
+  (test-assert (not (irregex-match-data? (vector '*irregex-match-tag* #f #f #f #f #f #f #f #f #f))))
+  (test-assert (not (irregex-match-data? (vector #f #f #f #f #f #f #f #f #f #f #f))))
+  (test-equal 0 (irregex-num-submatches (irregex "a.*b")))
+  (test-equal 1 (irregex-num-submatches (irregex "a(.*)b")))
+  (test-equal 2 (irregex-num-submatches (irregex "(a(.*))b")))
+  (test-equal 2 (irregex-num-submatches (irregex "a(.*)(b)")))
+  (test-equal 10 (irregex-num-submatches (irregex "((((((((((a))))))))))")))
+  (test-equal 0 (irregex-match-num-submatches (irregex-search "a.*b" "axxxb")))
+  (test-equal 1 (irregex-match-num-submatches (irregex-search "a(.*)b" "axxxb")))
+  (test-equal 2 (irregex-match-num-submatches (irregex-search "(a(.*))b" "axxxb")))
+  (test-equal 2 (irregex-match-num-submatches (irregex-search "a(.*)(b)" "axxxb")))
+  (test-equal 10 (irregex-match-num-submatches (irregex-search "((((((((((a))))))))))" "a")))
+  (test-assert
+   (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 0))
+  (test-assert
+   (not (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 1)))
+  (test-assert
+   (not (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") -1)))
+  (test-assert
+   (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 0))
+  (test-assert
+   (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 1))
+  (test-assert
+   (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 2))
+  (test-assert
+   (not (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 3)))
+  (test-equal 1 (irregex-match-start-index (irregex-search "a(.*)(b)" "axxxb") 1))
+  (test-equal 4 (irregex-match-end-index (irregex-search "a(.*)(b)" "axxxb") 1))
+  )
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
 (test-group "utils"
-  (test-equal "replace" 
-      (irregex-replace "[aeiou]" "hello world" "*")
-      "h*llo world")
-  (test-equal "replace/all"
-      (irregex-replace/all "[aeiou]" "hello world" "*")
-      "h*ll* w*rld"))
+  (test-equal "h*llo world"
+      (irregex-replace "[aeiou]" "hello world" "*"))
+  (test-equal "h*ll* w*rld"
+      (irregex-replace/all "[aeiou]" "hello world" "*"))
+  (test-equal '("bob@test.com" "fred@example.com")
+      (irregex-fold 'email
+                    (lambda (i m s) (cons (irregex-match-substring m) s))
+                    '()
+                    "bob@test.com and fred@example.com"
+                    (lambda (i s) (reverse s))))
+  (test-equal '("bob@test.com" "fred@example.com")
+      (irregex-fold/chunked
+       'email
+       (lambda (src i m s) (cons (irregex-match-substring m) s))
+       '()
+       rope-chunker
+       (rope "bob@test.com and fred@example.com")
+       (lambda (src i s) (reverse s))))
+  )
+
+
+(define (extract name irx str)
+  (irregex-match-substring (irregex-match irx str) name))
+(define (valid? name irx str)
+  (irregex-match-valid-index? (irregex-match irx str) name))
+(define (start-idx name irx str)
+  (irregex-match-start-index (irregex-match irx str) name))
+(define (end-idx name irx str)
+  (irregex-match-end-index (irregex-match irx str) name))
+
+(test-group "named submatches"
+  (test-equal "matching submatch is seen and extracted"
+              "first" (extract 'first `(or (submatch-named first "first")
+                                           (submatch-named second "second"))
+                               "first"))
+  (test-assert "matching submatch index is valid"
+               (valid? 'first `(or (submatch-named first "first")
+                                   (submatch-named second "second"))
+                       "first"))
+  (test-equal "nonmatching submatch is known but returns false"
+              #f
+              (extract 'second `(or (submatch-named first "first")
+                                    (submatch-named second "second"))
+                       "first"))
+  (test-assert "nonmatching submatch index is valid"
+               (valid? 'second `(or (submatch-named first "first")
+                                    (submatch-named second "second"))
+                       "first"))
+  (test-error "nonexisting submatch is unknown and raises an error"
+              (extract 'third `(or (submatch-named first "first")
+                                   (submatch-named second "second"))
+                       "first"))
+  (test-assert "nonexisting submatch index is invalid"
+               (not (valid? 'third `(or (submatch-named first "first")
+                                         (submatch-named second "second"))
+                            "first")))
+  (test-equal "matching alternative is used"
+              "first" (extract 'sub `(or (submatch-named sub "first")
+                                         (submatch-named sub "second"))
+                               "first"))
+  (test-equal "matching alternative is used (second match)"
+              "second" (extract 'sub `(or (submatch-named sub "first")
+                                          (submatch-named sub "second"))
+                                "second"))
+  (test-equal "last match is used with multiple matches for a name"
+              "second" (extract 'sub `(seq (submatch-named sub "first")
+                                           space
+                                           (submatch-named sub "second"))
+                                "first second"))
+  (test-equal "submatch start"
+              1
+              (start-idx 'xs `(seq "a" (submatch-named xs (+ "x")) "b") "axxxb"))
+  (test-error "unknown submatch start"
+              (start-idx 'xs `(seq "a" (submatch-named ys (+ "x")) "b") "axxxb"))
+  (test-equal "submatch end"
+              4 (end-idx 'xs `(seq "a" (submatch-named xs (+ "x")) "b") "axxxb"))
+  (test-error "unknown submatch start"
+              (end-idx 'xs `(seq "a" (submatch-named ys (+ "x")) "b") "axxxb")))
+
+;; This is here to help optimized implementations catch segfaults and
+;; other such problems.  These calls will always return errors in plain
+;; Scheme, but only because it will try to use the invalid object in a
+;; way that's not supported by the operator.  Once Scheme grows a
+;; standardized way of signaling and catching exceptions, these tests
+;; should be changed and expanded to check for specific condition types,
+;; and probably moved to the group where the procedure is being tested.
+(test-group "error handling"
+  (test-error (irregex 'invalid-sre))
+  (test-error (string->irregex 'not-a-string))
+  (test-error (sre->irregex 'invalid-sre))
+  
+  (test-error (irregex-search 'not-an-irx-or-sre "foo"))
+  (test-error (irregex-search "foo" 'not-a-string))
+  (test-error (irregex-search "foo" "foo" 'not-a-number))
+  (test-error (irregex-search "foo" "foo" 0 'not-a-number))
+
+  ;; TODO: irregex-new-matches, irregex-reset-matches!
+  ;; irregex-search/matches, make-irregex-chunker?
+
+  (test-error (irregex-match-valid-index? 'not-a-match-object 0))
+  (test-error (irregex-match-start-index 'not-a-match-object 0))
+  (test-error (irregex-match-start-index (irregex-search "foo" "foo") -1))
+  (test-error (irregex-match-end-index 'not-a-match-object 0))
+  (test-error (irregex-match-end-index (irregex-search "foo" "foo") -1))
+  
+  (test-error (irregex-match-start-chunk 'not-a-match-object 0))
+  (test-error (irregex-match-end-chunk 'not-a-match-object 0))
+  (test-error (irregex-match-substring 'not-a-match-object 0))
+  (test-error (irregex-match-subchunk 'not-a-match-object 0))
+  (test-error (irregex-match-num-submatches 'not-a-match-object))
+  (test-error (irregex-match-names 'not-a-match-object))
+  (test-error (irregex-num-submatches 'not-an-irx))
+  (test-error (irregex-names 'not-an-irx))
+  
+  (test-error (irregex-fold 'not-an-irx (lambda x x) 0 "foo" (lambda x x) 0 3))
+  (test-error (irregex-fold "foo" 'not-a-proc 0 "foo" (lambda x x) 0 3))
+  (test-error (irregex-fold "foo" (lambda (a b) b) 0 'not-a-string
+                            (lambda x x) 0 3))
+  (test-error (irregex-fold "foo" (lambda (a b) b) 0 "foo" 'not-a-proc 0 3))
+  (test-error (irregex-fold "foo" (lambda (a b) b) 0 "foo" (lambda x x)
+                            'not-a-number 3))
+  (test-error (irregex-fold "foo" (lambda (a b) b) 0 "foo" (lambda x x) 0
+                            'not-a-number))
+
+  (test-error (irregex-replace 'not-an-irx "str"))
+  (test-error (irregex-replace "foo" "foo" (lambda (x) 'not-a-string)))
+  (test-error (irregex-replace/all 'not-an-irx "str"))
+  (test-error (irregex-replace/all "foo" "foo" (lambda (x) 'not-a-string)))
+
+  ;; Are these supposed to be exported?
+  ;; irregex-nfa, irregex-dfa, irregex-dfa/search, irregex-dfa/extract
+  ;; irregex-flags, irregex-lengths
+  )
+
+(test-end)
+
+
+;;; UTF-8 tests
+
+(test-begin)
+
+(test-assert (irregex-search "(?u:<..>)" "<漢字>"))
+(test-assert (irregex-search "(?u:<.*>)" "<漢字>"))
+(test-assert (irregex-search "(?u:<.+>)" "<漢字>"))
+(test-assert (not (irregex-search "(?u:<.>)" "<漢字>")))
+(test-assert (not (irregex-search "(?u:<...>)" "<漢>")))
+
+(test-assert (irregex-search "(?u:<[^a-z]*>)" "<漢字>"))
+(test-assert (not (irregex-search "(?u:<[^a-z]*>)" "<漢m字>")))
+(test-assert (irregex-search "(?u:<[^a-z][^a-z]>)" "<漢字>"))
+(test-assert (irregex-search "(?u:<あ*>)" "<あ>"))
+(test-assert (irregex-search "(?u:<あ*>)" "<ああ>"))
+(test-assert (not (irregex-search "(?u:<あ*>)" "<あxあ>")))
+
+(test-assert (irregex-search "(?u:<[あ-ん]*>)" "<あん>"))
+(test-assert (irregex-search "(?u:<[あ-ん]*>)" "<ひらがな>"))
+(test-assert (not (irregex-search "(?u:<[あ-ん]*>)" "<ひらgがな>")))
 
 (test-end)
+
+
 (test-exit)
 
diff --git a/tests/test.scm b/tests/test.scm
index e9b43c14..c16de6a5 100644
--- a/tests/test.scm
+++ b/tests/test.scm
@@ -77,7 +77,8 @@
 (define-syntax test-equal
   (syntax-rules ()
     ((_ name expr value eq) (run-equal name (lambda () expr) value eq))
-    ((_ name expr value) (run-equal name (lambda () expr) value equal?))))
+    ((_ name expr value) (run-equal name (lambda () expr) value equal?))
+    ((_ expr value) (run-equal (->string value) (lambda () expr) value equal?))))
 
 (define-syntax test-error
   (syntax-rules ()
@@ -89,7 +90,8 @@
 
 (define-syntax test-assert
   (syntax-rules ()
-    ((_ name expr) (run-equal name (lambda () (if expr #t #f)) #t eq?))))
+    ((_ name expr) (run-equal name (lambda () (if expr #t #f)) #t eq?))
+    ((_ expr) (run-equal (->string expr) (lambda () (if expr #t #f)) #t eq?))))
 
 (define-syntax test-group
   (syntax-rules ()
diff --git a/types.db b/types.db
index f28c72a3..c269ce1a 100644
--- a/types.db
+++ b/types.db
@@ -538,30 +538,59 @@
 ;; irregex
 
 (irregex (procedure irregex (#!rest) *))
-(string->irregex (procedure string->irregex (string #!rest) *))
-(sre->irregex (procedure sre->irregex (#!rest) *))
-(string->sre (procedure string->sre (string #!rest) *))
-(irregex? (procedure irregex? (*) boolean))
+;irregex-apply-match
+(irregex-dfa (procedure irregex-dfa (*) *))
+(irregex-dfa/extract (procedure irregex-dfa/extract (*) *))
+(irregex-dfa/search (procedure irregex-dfa/search (*) *))
+(irregex-extract (procedure irregex-extract (* string #!optional fixnum fixnum) list))
+(irregex-flags (procedure irregex-flags (*) *))
+(irregex-fold (procedure irregex-fold (* (procedure (fixnum (struct regexp-match)) *) * string #!optional (procedure (fixnum *) *) fixnum fixnum) *))
+(irregex-fold/chunked (procedure irregex-fold/chunked (* (procedure (fixnum (struct regexp-match)) *) * procedure * #!optional (procedure (fixnum *) *) fixnum fixnum) *))
+(irregex-lengths (procedure irregex-lengths (*) *))
+(irregex-match (procedure irregex-match (* string) *))
+;irregex-match?
 (irregex-match-data? (procedure irregex-match-data? (*) boolean))
-(irregex-new-matches (procedure irregex-new-matches (*) *))
-(irregex-reset-matches! (procedure irregex-reset-matches! (*) *))
-(irregex-match-start (procedure irregex-match-start (* #!optional *) *))
 (irregex-match-end (procedure irregex-match-end (* #!optional *) *))
-(irregex-match-substring (procedure irregex-match-substring (* #!optional *) *))
-(irregex-search (procedure irregex-search (* string #!optional fixnum fixnum) *))
-(irregex-search/matches (procedure irregex-search/matches (* string fixnum fixnum *) *))
-(irregex-match (procedure irregex-match (* string) *))
+;irregex-match-end-chunk
+(irregex-match-end-index (procedure irregex-match-end-index ((struct regexp-match) #!optional *) fixnum))
+(irregex-match-names (procedure irregex-match-names ((struct regexp-match)) list))
+(irregex-match-num-submatches (procedure irregex-match-num-submatches ((struct regexp-match)) fixnum))
+(irregex-match-start (procedure irregex-match-start (* #!optional *) *))
+;irregex-match-start-chunk
+(irregex-match-start-index (procedure irregex-match-start-index ((struct regexp-match) #!optional *) fixnum))
 (irregex-match-string (procedure irregex-match-string (*) *))
+(irregex-match-subchunk (procedure irregex-match-subchunk ((struct regexp-match) #!optional *) *))
+(irregex-match-substring (procedure irregex-match-substring (* #!optional *) *))
+(irregex-match/chunked (procedure irregex-match/chunked (* * * #!optional fixnum) *))
+(irregex-names (procedure irregex-names (*) *))
+(irregex-new-matches (procedure irregex-new-matches (*) *))
+(irregex-nfa (procedure irregex-nfa (*) *))
+(irregex-num-submatches (procedure irregex-num-submatches (*) fixnum))
+(irregex-opt (procedure irregex-opt (list) *))
+(irregex-quote (procedure irregex-quote (string) string))
 (irregex-replace (procedure irregex-replace (* string #!rest) *))
 (irregex-replace/all (procedure irregex-replace/all (* string #!rest) *))
-(irregex-dfa (procedure irregex-dfa (*) *))
-(irregex-dfa/search (procedure irregex-dfa/search (*) *))
-(irregex-dfa/extract (procedure irregex-dfa/extract (*) *))
-(irregex-nfa (procedure irregex-nfa (*) *))
-(irregex-flags (procedure irregex-flags (*) *))
+(irregex-reset-matches! (procedure irregex-reset-matches! (*) *))
+(irregex-search (procedure irregex-search (* string #!optional fixnum fixnum) *))
+(irregex-search/matches (procedure irregex-search/matches (* string fixnum fixnum *) *))
+(irregex-split (procedure irregex-split (* string #!optional fixnum fixnum) list))
 (irregex-submatches (procedure irregex-submatches (*) *))
-(irregex-lengths (procedure irregex-lengths (*) *))
-(irregex-names (procedure irregex-names (*) *))
+(irregex-match-valid-index? irregex-match-valid-index? ((struct regexp-match) *) boolean)
+(irregex? (procedure irregex? (*) boolean))
+(make-irregex-chunker
+ (procedure make-irregex-chunker 
+	    ((procedure (*) *)
+	     (procedure (*) *)
+	     #!optional
+	     (procedure (*) *)
+	     (procedure (*) *)
+	     (procedure (* fixnum * fixnum) string)
+	     (procedure (* fixnum * fixnum) *))
+	    *))
+(maybe-string->sre (procedure maybe-string->sre (*) *))
+(sre->irregex (procedure sre->irregex (#!rest) *))
+(string->irregex (procedure string->irregex (string #!rest) *))
+(string->sre (procedure string->sre (string #!rest) *))
 
 ;; lolevel
 
@@ -894,22 +923,6 @@
 (with-input-from-pipe (procedure with-input-from-pipe (string (procedure () . *) #!optional symbol) . *))
 (with-output-to-pipe (procedure with-output-to-pipe (string (procedure () . *) #!optional symbol) . *))
 
-;; regex
-
-(glob->regexp (procedure glob->regexp (string #!optional *) *))
-(glob? deprecated)
-(grep (procedure grep (* list #!optional (procedure (*) *)) list))
-(regexp (procedure regexp (* #!optional * * *) (struct regexp)))
-(regexp-escape (procedure regexp-escape (string) string))
-(regexp? (procedure regexp? (*) boolean))
-(string-match (procedure string-match (* string) *))
-(string-match-positions (procedure string-match-positions (* string) *))
-(string-search (procedure string-search (* string #!optional fixnum fixnum) *))
-(string-search-positions (procedure string-search-positions (* string #!optional fixnum fixnum) *))
-(string-split-fields (procedure string-split-fields (* string #!optional * fixnum) list))
-(string-substitute (procedure string-substitute (* string string #!optional *) string))
-(string-substitute* (procedure string-substitute* (string list #!optional *) string))
-
 ;; srfi-1
 
 (alist-cons (procedure alist-cons (* * *) list))
diff --git a/utils.scm b/utils.scm
index 03bd4ed6..b934a550 100644
--- a/utils.scm
+++ b/utils.scm
@@ -27,7 +27,7 @@
 
 (declare
   (unit utils)
-  (uses extras srfi-13 posix files regex)
+  (uses extras srfi-13 posix files irregex)
   (fixnum)
   (hide chop-pds)
   (disable-interrupts) )
@@ -115,18 +115,15 @@
 ;;; Scan lines until regex or predicate matches
 
 (define scan-input-lines
-  (let ((regexp regexp)
-	(read-line read-line)
-	(string-search string-search))
-    (lambda (rx #!optional (port ##sys#standard-input))
-      (let ((rx (if (procedure? rx)
-		    rx
-		    (cut string-search (regexp rx) <>))))
-	(let loop ()
-	  (let ((ln (read-line port)))
-	    (and (not (eof-object? ln))
-		 (or (rx ln)
-		     (loop)))))))))
+  (lambda (rx #!optional (port ##sys#standard-input))
+    (let ((rx (if (procedure? rx)
+		  rx
+		  (cute irregex-search (irregex rx) <>))))
+      (let loop ()
+	(let ((ln (read-line port)))
+	  (and (not (eof-object? ln))
+	       (or (rx ln)
+		   (loop))))))))
 
 
 ;; Ask for confirmation
Trap