~ chicken-core (chicken-5) 0b0eb8d0e7188a0413d7b1c2aac4d9b41e607b64
commit 0b0eb8d0e7188a0413d7b1c2aac4d9b41e607b64
Author: Peter Bex <peter.bex@xs4all.nl>
AuthorDate: Sun Sep 23 15:35:51 2012 +0200
Commit: felix <felix@call-with-current-continuation.org>
CommitDate: Sun Sep 23 23:29:55 2012 +0200
Irregex: Use proper abstractions for manipulating the nfa-multi-state representation, to make the code more readable and maintainable. (upstream changeset 65b8e4a1529c)
diff --git a/irregex-core.scm b/irregex-core.scm
index 18bb50a6..edfbf01d 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -2392,23 +2392,19 @@
(nfa-set-epsilons! nfa i (cons (cons x t) eps)))))
(define (nfa-get-reorder-commands nfa mst)
- (cond ((assoc mst
- (vector-ref nfa (+ (* (nfa-multi-state-hash nfa mst)
- *nfa-num-fields*) 2)))
+ (cond ((assoc mst (vector-ref nfa (+ (* (mst-hash mst) *nfa-num-fields*) 2)))
=> cdr)
(else #f)))
(define (nfa-set-reorder-commands! nfa mst x)
- (let ((i (+ (* (nfa-multi-state-hash nfa mst) *nfa-num-fields*) 2)))
+ (let ((i (+ (* (mst-hash mst) *nfa-num-fields*) 2)))
(vector-set! nfa i (cons (cons mst x) (vector-ref nfa i)))))
(define (nfa-get-closure nfa mst)
- (cond ((assoc mst
- (vector-ref nfa (+ (* (nfa-multi-state-hash nfa mst)
- *nfa-num-fields*) 3)))
+ (cond ((assoc mst (vector-ref nfa (+ (* (mst-hash mst) *nfa-num-fields*) 3)))
=> cdr)
(else #f)))
(define (nfa-add-closure! nfa mst x)
- (let ((i (+ (* (nfa-multi-state-hash nfa mst) *nfa-num-fields*) 3)))
+ (let ((i (+ (* (mst-hash mst) *nfa-num-fields*) 3)))
(vector-set! nfa i (cons (cons mst x) (vector-ref nfa i)))))
;; Compile and return the vector of NFA states (in groups of
@@ -2668,18 +2664,32 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; NFA multi-state representation
-(define (nfa-multi-state-hash nfa mst)
+(define *mst-first-state-index* 3)
+
+(define (mst-mappings-summary mst)
+ (vector-ref mst 0))
+
+(define (mst-num-states mst)
+ (vector-ref mst 1))
+
+(define (mst-num-states-set! mst num)
+ (vector-set! mst 1 num))
+
+(define (mst-hash mst)
;; We could do (modulo X (nfa-num-states nfa)) here which would be faster,
;; but we can't assume a full numerical tower (and updating *could*
;; produce a bignum), so we do it each time when updating the hash.
(vector-ref mst 2))
+(define (mst-hash-set! mst hash)
+ (vector-set! mst 2 hash))
+
;; Returns #f if NFA state does not occur in multi-state
-(define (nfa-state-mappings mst state)
- (vector-ref mst (+ state 3)))
+(define (mst-state-mappings mst state)
+ (vector-ref mst (+ state *mst-first-state-index*)))
-(define (nfa-multi-state-mappings-summary mst)
- (vector-ref mst 0))
+(define (mst-state-mappings-set! mst state mappings)
+ (vector-set! mst (+ state *mst-first-state-index*) mappings))
;; A multi-state holds a set of states with their tag-to-slot mappings.
;; Slot 0 contains a summary of all mappings for all states in the multi-state.
@@ -2689,35 +2699,35 @@
;; state numbers plus each tag value (once per occurrence). This is a silly
;; hashing calculation, but it seems to produce a well-spread out hash table and
;; it has the added advantage that we can use the value as a quick check if the
-;; state is definitely NOT equivalent to another in nfa-multi-state-same-states?
+;; state is definitely NOT equivalent to another in mst-same-states?
;; The other slots contain mappings for each corresponding state.
-(define (make-nfa-multi-state nfa)
- (let ((mst (make-vector (+ (nfa-num-states nfa) 3) #f)))
+(define (make-mst nfa)
+ (let ((mst (make-vector (+ (nfa-num-states nfa) *mst-first-state-index*) #f)))
(vector-set! mst 0 (make-vector (nfa-num-tags nfa) '())) ; tag summary
(vector-set! mst 1 0) ; total number of states
(vector-set! mst 2 0) ; states and tags hash
mst))
;; NOTE: This doesn't do a deep copy of the mappings. Don't mutate them!
-(define (nfa-multi-state-copy mst)
+(define (mst-copy mst)
(let ((v (vector-copy mst)))
(vector-set! v 0 (vector-copy (vector-ref mst 0)))
v))
-(define (nfa-state->multi-state nfa state mappings)
- (let ((mst (make-nfa-multi-state nfa)))
- (nfa-multi-state-add! nfa mst state mappings)
+(define (nfa-state->mst nfa state mappings)
+ (let ((mst (make-mst nfa)))
+ (mst-add! nfa mst state mappings)
mst))
;; Extend multi-state with a state and add its tag->slot mappings.
-(define (nfa-multi-state-add! nfa mst state mappings)
- (let ((hash-value (vector-ref mst 2)))
- (cond ((not (vector-ref mst (+ state 3))) ; Update state hash & count?
- (set! hash-value (+ (vector-ref mst 2) state))
- (vector-set! mst 1 (+ (vector-ref mst 1) 1))))
- (vector-set! mst (+ state 3) mappings)
- (let ((all-mappings (vector-ref mst 0)))
+(define (mst-add! nfa mst state mappings)
+ (let ((hash-value (mst-hash mst)))
+ (cond ((not (mst-state-mappings mst state)) ; Update state hash & count?
+ (set! hash-value (+ hash-value state))
+ (mst-num-states-set! mst (+ (mst-num-states mst) 1))))
+ (mst-state-mappings-set! mst state mappings)
+ (let ((all-mappings (mst-mappings-summary mst)))
(for-each
(lambda (tag&slot)
(let* ((t (car tag&slot))
@@ -2727,22 +2737,23 @@
(set! hash-value (+ hash-value t))
(vector-set! all-mappings t (cons s m))))))
mappings))
- (vector-set! mst 2 (modulo hash-value (nfa-num-states nfa)))))
+ (mst-hash-set! mst (modulo hash-value (nfa-num-states nfa)))))
;; Same as above, but skip updating mappings summary.
;; Called when we know all the tag->slot mappings are already in the summary.
-(define (nfa-multi-state-add/fast! nfa mst state mappings)
- (cond ((not (vector-ref mst (+ state 3))) ; Update state hash & count?
- (vector-set! mst 2 (modulo (+ (vector-ref mst 2) state)
- (nfa-num-states nfa)))
- (vector-set! mst 1 (+ (vector-ref mst 1) 1))))
- (vector-set! mst (+ state 3) mappings))
+(define (mst-add/fast! nfa mst state mappings)
+ (cond ((not (mst-state-mappings mst state)) ; Update state hash & count?
+ (mst-hash-set!
+ mst (modulo (+ (mst-hash mst) state)
+ (nfa-num-states nfa)))
+ (mst-num-states-set! mst (+ (mst-num-states mst) 1))))
+ (mst-state-mappings-set! mst state mappings))
;; Same as above, assigning a new slot for a tag. This slot is then
;; added to the summary, if it isn't in there yet. This is more efficient
;; than looping through all the mappings.
-(define (nfa-multi-state-add-tagged! nfa mst state mappings tag slot)
- (let* ((mappings-summary (vector-ref mst 0))
+(define (mst-add-tagged! nfa mst state mappings tag slot)
+ (let* ((mappings-summary (mst-mappings-summary mst))
(summary-tag-slots (vector-ref mappings-summary tag))
(new-mappings (let lp ((m mappings)
(res '()))
@@ -2750,43 +2761,43 @@
((= (caar m) tag)
(append res (cons (cons tag slot) (cdr m))))
(else (lp (cdr m) (cons (car m) res))))))
- (hash-value (vector-ref mst 2)))
- (cond ((not (vector-ref mst (+ state 3))) ; Update state hash & count?
+ (hash-value (mst-hash mst)))
+ (cond ((not (mst-state-mappings mst state)) ; Update state hash & count?
(set! hash-value (+ hash-value state))
- (vector-set! mst 1 (+ (vector-ref mst 1) 1))))
- (vector-set! mst (+ state 3) new-mappings)
+ (mst-num-states-set! mst (+ (mst-num-states mst) 1))))
+ (mst-state-mappings-set! mst state new-mappings)
(cond ((not (memv slot summary-tag-slots)) ; Update tag/slot summary
(set! hash-value (+ hash-value tag))
(vector-set! mappings-summary tag (cons slot summary-tag-slots))))
- (vector-set! mst 2 (modulo hash-value (nfa-num-states nfa)))
+ (mst-hash-set! mst (modulo hash-value (nfa-num-states nfa)))
new-mappings))
-(define (nfa-multi-state-same-states? a b)
+(define (mst-same-states? a b)
;; First check if hash and state counts match, then check each state
- (and (= (vector-ref a 2) (vector-ref b 2))
- (= (vector-ref a 1) (vector-ref b 1))
+ (and (= (mst-hash a) (mst-hash b))
+ (= (mst-num-states a) (mst-num-states b))
(let ((len (vector-length a)))
- (let lp ((i 3))
+ (let lp ((i *mst-first-state-index*))
(or (= i len)
(and (equal? (not (vector-ref a i))
(not (vector-ref b i)))
(lp (+ i 1))))))))
-(define (nfa-multi-state-fold mst kons knil)
+(define (mst-fold mst kons knil)
(let ((limit (vector-length mst)))
- (let lp ((i 3)
+ (let lp ((i *mst-first-state-index*)
(acc knil))
(if (= i limit)
acc
(let ((m (vector-ref mst i)))
- (lp (+ i 1) (if m (kons (- i 3) m acc) acc)))))))
+ (lp (+ i 1) (if m (kons (- i *mst-first-state-index*) m acc) acc)))))))
;; Find the lowest fresh index for this tag that's unused
;; in the multi-state. This also updates the nfa's highest
;; tag counter if a completely new slot number was assigned.
(define (next-index-for-tag! nfa tag mst)
(let* ((highest (nfa-highest-map-index nfa))
- (tag-slots (vector-ref (vector-ref mst 0) tag))
+ (tag-slots (vector-ref (mst-mappings-summary mst) tag))
(new-index (do ((slot 0 (+ slot 1)))
((not (memv slot tag-slots)) slot))))
(cond ((> new-index highest)
@@ -2828,12 +2839,12 @@
(define (nfa->dfa nfa . o)
(let* ((max-states (and (pair? o) (car o)))
- (start (nfa-state->multi-state nfa (nfa-start-state nfa) '()))
+ (start (nfa-state->mst nfa (nfa-start-state nfa) '()))
(start-closure (nfa-epsilon-closure nfa start))
;; Set up a special "initializer" state from which we reach the
;; start-closure to ensure that leading tags are set properly.
(init-set (tag-set-commands-for-closure nfa start start-closure '()))
- (dummy (make-nfa-multi-state nfa))
+ (dummy (make-mst nfa))
(init-state (list dummy #f `((,start-closure #f () . ,init-set)))))
;; Unmarked states are just sets of NFA states with tag-maps, marked states
;; are sets of NFA states with transitions to sets of NFA states
@@ -2855,7 +2866,7 @@
(unmarked-states (cdr unmarked-states))
(dfa-trans '()))
(if (null? trans)
- (let ((finalizer (nfa-state-mappings dfa-state 0)))
+ (let ((finalizer (mst-state-mappings dfa-state 0)))
(lp unmarked-states
(cons (list dfa-state finalizer dfa-trans) marked-states)
(+ dfa-size 1)))
@@ -2903,7 +2914,7 @@
(define (csets-intersect? a b)
(let ((i (cset-intersection a b)))
(and (not (cset-empty? i)) i)))
- (nfa-multi-state-fold
+ (mst-fold
annotated-states
(lambda (st mappings res)
(let ((trans (nfa-get-state-trans nfa st))) ; Always one state per trans
@@ -2913,13 +2924,14 @@
(cond
;; State not seen yet? Add a new state transition
((null? ls)
- ;; TODO: We should try to find an existing DFA state with only
- ;; this NFA state in it, and extend the cset with the current one.
- ;; This produces smaller DFAs, but takes longer to compile.
- (cons (cons cs (nfa-state->multi-state nfa state mappings))
+ ;; TODO: We should try to find an existing DFA state
+ ;; with only this NFA state in it, and extend the cset
+ ;; with the current one. This produces smaller DFAs,
+ ;; but takes longer to compile.
+ (cons (cons cs (nfa-state->mst nfa state mappings))
res))
((cset=? cs (caar ls)) ; Add state to existing set for this charset
- (nfa-multi-state-add! nfa (cdar ls) state mappings)
+ (mst-add! nfa (cdar ls) state mappings)
(append ls res))
((csets-intersect? cs (caar ls)) =>
(lambda (intersection)
@@ -2927,14 +2939,15 @@
(only-in-old (cset-difference (caar ls) cs))
(states-in-both (cdar ls))
(states-for-old (and (not (cset-empty? only-in-old))
- (nfa-multi-state-copy states-in-both)))
+ (mst-copy states-in-both)))
(res (if states-for-old
(cons (cons only-in-old states-for-old) res)
res)))
- (nfa-multi-state-add! nfa states-in-both state mappings)
- ;; Add this state to the states already here and restrict to
- ;; the overlapping charset and continue with the remaining subset
- ;; of the new cset (if nonempty)
+ (mst-add! nfa states-in-both state mappings)
+ ;; Add this state to the states already here and
+ ;; restrict to the overlapping charset and continue
+ ;; with the remaining subset of the new cset (if
+ ;; nonempty)
(if (cset-empty? only-in-new)
(cons (cons intersection states-in-both)
(append (cdr ls) res))
@@ -2948,12 +2961,12 @@
;; through epsilon transitions, with the tags encountered on the way.
(define (nfa-epsilon-closure-internal nfa annotated-states)
;; The stack _MUST_ be in this order for some reason I don't fully understand
- (let lp ((stack (nfa-multi-state-fold annotated-states
+ (let lp ((stack (mst-fold annotated-states
(lambda (st m res)
(cons (cons st m) res))
'()))
(priorities (make-vector (nfa-num-states nfa) 0))
- (closure (nfa-multi-state-copy annotated-states)))
+ (closure (mst-copy annotated-states)))
(if (null? stack)
closure
(let ((prio/orig-state (caar stack)) ; priority is just the state nr.
@@ -2972,11 +2985,11 @@
((cdar trans) => ; tagged transition?
(lambda (tag)
(let* ((index (next-index-for-tag! nfa tag closure))
- (new-mappings (nfa-multi-state-add-tagged!
+ (new-mappings (mst-add-tagged!
nfa closure state mappings tag index)))
(lp2 (cdr trans) (cons (cons state new-mappings) stack)))))
(else
- (nfa-multi-state-add/fast! nfa closure state mappings)
+ (mst-add/fast! nfa closure state mappings)
(lp2 (cdr trans) (cons (cons state mappings) stack)))))
(else (lp2 (cdr trans) stack))))))))))
@@ -2991,8 +3004,8 @@
;; not present in the original state.
(define (tag-set-commands-for-closure nfa orig-state closure copy-cmds)
(let ((num-tags (nfa-num-tags nfa))
- (closure-summary (nfa-multi-state-mappings-summary closure))
- (state-summary (nfa-multi-state-mappings-summary orig-state)))
+ (closure-summary (mst-mappings-summary closure))
+ (state-summary (mst-mappings-summary orig-state)))
(let lp ((t 0) (cmds '()))
(if (= t num-tags)
cmds
@@ -3030,14 +3043,13 @@
(define (find-reorder-commands-internal nfa closure dfa-states)
(let ((num-states (nfa-num-states nfa))
(num-tags (nfa-num-tags nfa))
- (closure-summary (nfa-multi-state-mappings-summary closure)))
+ (closure-summary (mst-mappings-summary closure)))
(let lp ((dfa-states dfa-states))
(if (null? dfa-states)
#f
- (if (not (nfa-multi-state-same-states? (caar dfa-states) closure))
+ (if (not (mst-same-states? (caar dfa-states) closure))
(lp (cdr dfa-states))
- (let lp2 ((state-summary (nfa-multi-state-mappings-summary
- (caar dfa-states)))
+ (let lp2 ((state-summary (mst-mappings-summary (caar dfa-states)))
(t 0) (cmds '()))
(if (= t num-tags)
(cons (caar dfa-states) cmds)
Trap