~ chicken-core (master) /data-structures.scm


  1;;; data-structures.scm - Optional data structures extensions
  2;
  3; Copyright (c) 2008-2022, The CHICKEN Team
  4; All rights reserved.
  5;
  6; Redistribution and use in source and binary forms, with or without
  7; modification, are permitted provided that the following conditions
  8; are met:
  9;
 10;   Redistributions of source code must retain the above copyright notice, this list of conditions and the following
 11;     disclaimer.
 12;   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
 13;     disclaimer in the documentation and/or other materials provided with the distribution.
 14;   Neither the name of the author nor the names of its contributors may be used to endorse or promote
 15;     products derived from this software without specific prior written permission.
 16;
 17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
 18; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 19; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
 20; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 21; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 22; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 23; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 24; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 25; POSSIBILITY OF SUCH DAMAGE.
 26
 27
 28(declare
 29 (unit data-structures))
 30
 31(module chicken.string
 32  (conc ->string string-chop string-chomp
 33   string-compare3 string-compare3-ci
 34   reverse-list->string reverse-string-append
 35   string-intersperse string-split
 36   string-translate string-translate*
 37   substring=? substring-ci=?
 38   substring-index substring-index-ci)
 39
 40(import scheme)
 41(import chicken.base)
 42(import chicken.condition)
 43(import chicken.fixnum)
 44(import chicken.foreign)
 45(import (only (scheme base) open-output-string get-output-string))
 46
 47; (reverse-string-append l) = (apply string-append (reverse l))
 48
 49(define (reverse-string-append l)
 50  (define (rev-string-append l i)
 51    (if (pair? l)
 52      (let* ((str (car l))
 53	     (len (string-length str))
 54	     (result (rev-string-append (cdr l) (fx+ i len))))
 55	(let loop ((j 0) (k (fx- (fx- (string-length result) i) len)))
 56	  (if (fx< j len)
 57	    (begin
 58	      (string-set! result k (string-ref str j))
 59	      (loop (fx+ j 1) (fx+ k 1)))
 60	    result)))
 61      (make-string i)))
 62  (rev-string-append l 0))
 63
 64(define (reverse-list->string l)
 65  (##sys#reverse-list->string l))
 66
 67;;; Anything->string conversion:
 68
 69(define ->string
 70  (lambda (x)
 71    (cond [(string? x) x]
 72	  [(symbol? x) (symbol->string x)]
 73	  [(char? x) (string x)]
 74	  [(number? x) (##sys#number->string x)]
 75	  [else
 76	   (let ([o (open-output-string)])
 77	     (display x o)
 78	     (get-output-string o) ) ] ) ) )
 79
 80(define conc
 81  (lambda args
 82    (apply string-append (map ->string args)) ) )
 83
 84
 85;;; Search one string inside another:
 86
 87(let ()
 88  (define (traverse which where start test loc)
 89    (##sys#check-string which loc)
 90    (##sys#check-string where loc)
 91    (let* ((wherelen (string-length where))
 92           (whichlen (string-length which))
 93           (end (fx- wherelen whichlen)))
 94      (##sys#check-fixnum start loc)
 95      (if (and (fx>= start 0)
 96               (fx>= wherelen start))
 97          (if (fx= whichlen 0)
 98              start
 99              (and (fx>= end 0)
100                   (let loop ((istart start))
101                     (cond ((fx> istart end) #f)
102                           ((test istart whichlen) istart)
103                           (else (loop (fx+ istart 1)))))))
104          (##sys#error-hook (foreign-value "C_OUT_OF_BOUNDS_ERROR" int)
105                            loc
106                            where
107                            start))))
108
109  (set! ##sys#substring-index
110    (lambda (which where start)
111      (traverse
112       which where start
113       (lambda (i l)
114         (##core#inline "C_u_i_substring_equal_p" which where 0 i l))
115       'substring-index) ) )
116  (set! ##sys#substring-index-ci
117    (lambda (which where start)
118      (traverse
119       which where start
120       (lambda (i l) (##core#inline "C_u_i_substring_ci_equal_p" which where 0 i l))
121       'substring-index-ci) ) ) )
122
123(define (substring-index which where #!optional (start 0))
124  (##sys#substring-index which where start) )
125
126(define (substring-index-ci which where #!optional (start 0))
127  (##sys#substring-index-ci which where start) )
128
129
130;;; 3-Way string comparison:
131
132(define (string-compare3 s1 s2)
133  (##sys#check-string s1 'string-compare3)
134  (##sys#check-string s2 'string-compare3)
135  (let ((len1 (string-length s1))
136	(len2 (string-length s2)) )
137    (let* ((len-diff (fx- len1 len2))
138	   (cmp (##core#inline "C_utf_compare" s1 s2 0 0
139                        (if (fx< len-diff 0) len1 len2))))
140      (if (fx= cmp 0)
141	  len-diff
142	  cmp))))
143
144(define (string-compare3-ci s1 s2)
145  (##sys#check-string s1 'string-compare3-ci)
146  (##sys#check-string s2 'string-compare3-ci)
147  (let ((len1 (string-length s1))
148	(len2 (string-length s2)) )
149    (let* ((len-diff (fx- len1 len2))
150	   (cmp (##core#inline "C_utf_compare_ci"
151                        s1 s2 0 0
152                        (if (fx< len-diff 0) len1 len2))))
153      (if (fx= cmp 0)
154	  len-diff
155	  cmp))))
156
157
158;;; Substring comparison:
159
160(define (##sys#substring=? s1 s2 start1 start2 n)
161  (##sys#check-string s1 'substring=?)
162  (##sys#check-string s2 'substring=?)
163  (##sys#check-fixnum start1 'substring=?)
164  (##sys#check-fixnum start2 'substring=?)
165  (let* ((l1 (string-length s1))
166         (l2 (string-length s2))
167         (maxlen (fxmin (fx- l1 start1)
168	                (fx- l2 start2) ) )
169         (len (if n
170                  (begin (##sys#check-range n 0 (fx+ maxlen 1) 'substring=?) n)
171                  maxlen)))
172    (##sys#check-range start1 0 (fx+ l1 1) 'substring=?)
173    (##sys#check-range start2 0 (fx+ l2 1) 'substring=?)
174    (##core#inline "C_u_i_substring_equal_p" s1 s2 start1 start2 len) ) )
175
176(define (substring=? s1 s2 #!optional (start1 0) (start2 0) len)
177  (##sys#substring=? s1 s2 start1 start2 len) )
178
179(define (##sys#substring-ci=? s1 s2 start1 start2 n)
180  (##sys#check-string s1 'substring-ci=?)
181  (##sys#check-string s2 'substring-ci=?)
182  (##sys#check-fixnum start1 'substring-ci=?)
183  (##sys#check-fixnum start2 'substring-ci=?)
184  (let* ((l1 (string-length s1))
185         (l2 (string-length s2))
186         (maxlen (fxmin (fx- l1 start1)
187		        (fx- l2 start2) ) )
188         (len (if n
189                  (begin (##sys#check-range n 0 (fx+ maxlen 1) 'substring-ci=?) n)
190                  maxlen)))
191    (##sys#check-range start1 0 (fx+ l1 1) 'substring=?)
192    (##sys#check-range start2 0 (fx+ l2 1) 'substring=?)
193    (##core#inline "C_u_i_substring_ci_equal_p" s1 s2 start1 start2 len) ) )
194
195(define (substring-ci=? s1 s2 #!optional (start1 0) (start2 0) len)
196  (##sys#substring-ci=? s1 s2 start1 start2 len) )
197
198
199;;; Split string into substrings:
200
201(define string-split
202  (lambda (str . delstr-and-flag)
203    (##sys#check-string str 'string-split)
204    (let* ([del (if (null? delstr-and-flag) "\t\n " (car delstr-and-flag))]
205	   [flag (if (fx= (length delstr-and-flag) 2) (cadr delstr-and-flag) #f)]
206	   [strlen (string-length str)] )
207      (##sys#check-string del 'string-split)
208      (let ([dellen (string-length del)]
209	    [first #f] )
210	(define (add from to last)
211	  (let ([node (cons (##sys#substring str from to) '())])
212	    (if first
213		(##sys#setslot last 1 node)
214		(set! first node) )
215	    node) )
216	(let loop ([i 0] [last #f] [from 0])
217	  (cond [(fx>= i strlen)
218		 (when (or (fx> i from) flag) (add from i last))
219		 (or first '()) ]
220		[else
221		 (let ([c (string-ref str i)])
222		   (let scan ([j 0])
223		     (cond [(fx>= j dellen) (loop (fx+ i 1) last from)]
224			   [(eq? c (string-ref del j))
225			    (let ([i2 (fx+ i 1)])
226			      (if (or (fx> i from) flag)
227				  (loop i2 (add from i last) i2)
228				  (loop i2 last i2) ) ) ]
229			   [else (scan (fx+ j 1))] ) ) ) ] ) ) ) ) ) )
230
231
232;;; Concatenate list of strings:
233
234(define (string-intersperse strs #!optional (ds " "))
235  (##sys#check-list strs 'string-intersperse)
236  (##sys#check-string ds 'string-intersperse)
237  (let* ((dsbv (##sys#slot ds 0))
238         (dslen (fx- (##sys#size dsbv) 1)))
239    (let loop1 ((ss strs) (n 0))
240      (cond ((##core#inline "C_eqp" ss '())
241	     (if (##core#inline "C_eqp" strs '())
242		 ""
243		 (let* ((bytes (fx- n dslen))
244                        (bv (##sys#allocate-bytevector (fx+ bytes 1) 0)))
245		   (let loop2 ((ss2 strs) (n2 0))
246		     (let* ((stri (##sys#slot ss2 0))
247			    (next (##sys#slot ss2 1))
248                            (bvi (##sys#slot stri 0))
249			    (count (fx- (##sys#size bvi) 1)))
250		       (##core#inline "C_copy_memory_with_offset" bv bvi n2 0 count)
251		       (let ((n3 (fx+ n2 count)))
252			 (if (##core#inline "C_eqp" next '())
253                             (##core#inline_allocate ("C_a_ustring" 5) bv
254                                                     (##core#inline "C_utf_range_length"
255                                                                    bv 0 n3))
256
257                     			     (begin
258			       (##core#inline "C_copy_memory_with_offset"
259                                              bv dsbv n3 0 dslen)
260			       (loop2 next (fx+ n3 dslen)) ) ) ) ) ) ) ) )
261	    ((and (##core#inline "C_blockp" ss) (##core#inline "C_pairp" ss))
262	     (let ((stri (##sys#slot ss 0)))
263	       (##sys#check-string stri 'string-intersperse)
264	       (loop1 (##sys#slot ss 1)
265		      (fx+ (fx- (##sys#size (##sys#slot stri 0)) 1)
266                           (fx+ dslen n)) ) ) )
267	    (else (##sys#error-not-a-proper-list strs)) ) ) ) )
268
269
270;;; Translate elements of a string:
271
272(define string-translate
273  (lambda (str from . to)
274    (define (instring s)
275      (let ([len (string-length s)])
276	(lambda (c)
277	  (let loop ([i 0])
278	    (cond [(fx>= i len) #f]
279		  [(eq? c (string-ref s i)) i]
280		  [else (loop (fx+ i 1))] ) ) ) ) )
281    (let* ([from
282	    (cond [(char? from) (lambda (c) (eq? c from))]
283		  [(pair? from) (instring (list->string from))]
284		  [else
285		   (##sys#check-string from 'string-translate)
286		   (instring from) ] ) ]
287	   [to
288	    (and (pair? to)
289		 (let ([tx (##sys#slot to 0)])
290		   (cond [(char? tx) tx]
291			 [(pair? tx) (list->string tx)]
292			 [else
293			  (##sys#check-string tx 'string-translate)
294			  tx] ) ) ) ]
295	   [tlen (and (string? to) (string-length to))] )
296      (##sys#check-string str 'string-translate)
297      (let* ([slen (string-length str)]
298	     [str2 (make-string slen)] )
299	(let loop ([i 0] [j 0])
300	  (if (fx>= i slen)
301	      (if (fx< j i)
302		  (##sys#substring str2 0 j)
303		  str2)
304	      (let* ([ci (string-ref str i)]
305		     [found (from ci)] )
306		(cond [(not found)
307		       (string-set! str2 j ci)
308		       (loop (fx+ i 1) (fx+ j 1)) ]
309		      [(not to) (loop (fx+ i 1) j)]
310		      [(char? to)
311		       (string-set! str2 j to)
312		       (loop (fx+ i 1) (fx+ j 1)) ]
313		      [(fx>= found tlen)
314		       (##sys#error 'string-translate "invalid translation destination" i to) ]
315		      [else
316		       (string-set! str2 j (string-ref to found))
317		       (loop (fx+ i 1) (fx+ j 1)) ] ) ) ) ) ) ) ) )
318
319(define (fragments->string total fs)
320  (let ((dest (##sys#make-bytevector (fx+ total 1))))
321    (let loop ((fs fs) (pos 0))
322      (if (null? fs)
323	  (##core#inline_allocate ("C_a_ustring" 5) dest
324                           (##core#inline "C_utf_length" dest))
325	  (let* ((f (##sys#slot fs 0))
326		 (flen (fx- (##sys#size f) 1)))
327	    (##core#inline "C_copy_memory_with_offset" dest f pos 0 flen)
328	    (loop (##sys#slot fs 1) (fx+ pos flen)) ) ) ) ) )
329
330(define (string-translate* str smap)
331  (##sys#check-string str 'string-translate*)
332  (##sys#check-list smap 'string-translate*)
333  (let ((len (string-length str)))
334    (define (collect i from total fs)
335      (if (fx>= i len)
336	  (begin
337            (when (fx> i from)
338              (let ((bv (##sys#slot (##sys#substring str from i) 0)))
339                (set! fs (cons bv fs))
340                (set! total (fx+ total (fx- (##sys#size bv) 1)))))
341  	    (fragments->string total (##sys#fast-reverse fs)))
342	  (let loop ((smap smap))
343	    (if (null? smap)
344		(collect (fx+ i 1) from total fs)
345		(let* ((p (car smap))
346		       (sm (car p))
347		       (smlen (string-length sm))
348		       (st (cdr p)) )
349		  (if (and (fx<= (fx+ i smlen) len)
350			   (##core#inline "C_u_i_substring_equal_p" str sm i 0 smlen))
351		      (let ((i2 (fx+ i smlen))
352                            (stbv (##sys#slot st 0)))
353			(when (fx> i from)
354                          (let ((bv (##sys#slot (##sys#substring str from i) 0)))
355                            (set! fs (cons bv fs))
356                            (set! total (fx+ total (fx- (##sys#size bv) 1)))))
357			(collect
358			 i2 i2
359			 (fx+ total (fx- (##sys#size stbv) 1))
360			 (cons stbv fs) ) )
361		      (loop (cdr smap)) ) ) ) ) ) )
362    (collect 0 0 0 '()) ) )
363
364
365;;; Chop string into substrings:
366
367(define (string-chop str len)
368  (##sys#check-string str 'string-chop)
369  (##sys#check-fixnum len 'string-chop)
370  (let ([total (string-length str)])
371    (let loop ([total total] [pos 0])
372      (cond [(fx<= total 0) '()]
373	    [(fx<= total len) (list (##sys#substring str pos (fx+ pos total)))]
374	    [else (cons (##sys#substring str pos (fx+ pos len)) (loop (fx- total len) (fx+ pos len)))] ) ) ) )
375
376
377;;; Remove suffix
378
379(define (string-chomp str #!optional (suffix "\n"))
380  (##sys#check-string str 'string-chomp)
381  (##sys#check-string suffix 'string-chomp)
382  (let* ((len (string-length str))
383	 (slen (string-length suffix))
384	 (diff (fx- len slen)) )
385    (if (and (fx>= len slen)
386	     (##core#inline "C_u_i_substring_equal_p" str suffix diff 0 slen) )
387	(##sys#substring str 0 diff)
388	str) ) )
389
390) ; chicken.string
391
392
393(module chicken.sort
394    (merge merge! sort sort! sorted? topological-sort)
395
396(import scheme chicken.base chicken.condition chicken.fixnum)
397
398;;; Defines: sorted?, merge, merge!, sort, sort!
399;;; Author : Richard A. O'Keefe (based on Prolog code by D.H.D.Warren)
400;;;
401;;; This code is in the public domain.
402
403;;; Updated: 11 June 1991
404;;; Modified for scheme library: Aubrey Jaffer 19 Sept. 1991
405;;; Updated: 19 June 1995
406
407;;; (sorted? sequence less?)
408;;; is true when sequence is a list (x0 x1 ... xm) or a vector #(x0 ... xm)
409;;; such that for all 1 <= i <= m,
410;;;	(not (less? (list-ref list i) (list-ref list (- i 1)))).
411
412; Modified by flw for use with CHICKEN:
413;
414
415
416(define (sorted? seq less?)
417    (cond
418	((null? seq)
419	    #t)
420	((vector? seq)
421	    (let ((n (vector-length seq)))
422		(if (<= n 1)
423		    #t
424		    (do ((i 1 (+ i 1)))
425			((or (= i n)
426			     (less? (vector-ref seq i)
427				    (vector-ref seq (- i 1))))
428			    (= i n)) )) ))
429	(else
430	    (let loop ((last (car seq)) (next (cdr seq)))
431		(or (null? next)
432		    (and (not (less? (car next) last))
433			 (loop (car next) (cdr next)) )) )) ))
434
435
436;;; (merge a b less?)
437;;; takes two lists a and b such that (sorted? a less?) and (sorted? b less?)
438;;; and returns a new list in which the elements of a and b have been stably
439;;; interleaved so that (sorted? (merge a b less?) less?).
440;;; Note:  this does _not_ accept vectors.  See below.
441
442(define (merge a b less?)
443    (cond
444	((null? a) b)
445	((null? b) a)
446	(else (let loop ((x (car a)) (a (cdr a)) (y (car b)) (b (cdr b)))
447	    ;; The loop handles the merging of non-empty lists.	 It has
448	    ;; been written this way to save testing and car/cdring.
449	    (if (less? y x)
450		(if (null? b)
451		    (cons y (cons x a))
452		    (cons y (loop x a (car b) (cdr b)) ))
453		;; x <= y
454		(if (null? a)
455		    (cons x (cons y b))
456		    (cons x (loop (car a) (cdr a) y b)) )) )) ))
457
458
459;;; (merge! a b less?)
460;;; takes two sorted lists a and b and smashes their cdr fields to form a
461;;; single sorted list including the elements of both.
462;;; Note:  this does _not_ accept vectors.
463
464(define (merge! a b less?)
465    (define (loop r a b)
466	(if (less? (car b) (car a))
467	    (begin
468		(set-cdr! r b)
469		(if (null? (cdr b))
470		    (set-cdr! b a)
471		    (loop b a (cdr b)) ))
472	    ;; (car a) <= (car b)
473	    (begin
474		(set-cdr! r a)
475		(if (null? (cdr a))
476		    (set-cdr! a b)
477		    (loop a (cdr a) b)) )) )
478    (cond
479	((null? a) b)
480	((null? b) a)
481	((less? (car b) (car a))
482	    (if (null? (cdr b))
483		(set-cdr! b a)
484		(loop b a (cdr b)))
485	    b)
486	(else ; (car a) <= (car b)
487	    (if (null? (cdr a))
488		(set-cdr! a b)
489		(loop a (cdr a) b))
490	    a)))
491
492
493;;; (sort! sequence less?)
494;;; sorts the list or vector sequence destructively.  It uses a version
495;;; of merge-sort invented, to the best of my knowledge, by David H. D.
496;;; Warren, and first used in the DEC-10 Prolog system.	 R. A. O'Keefe
497;;; adapted it to work destructively in Scheme.
498
499(define (sort! seq less?)
500    (define (step n)
501	(cond
502	    ((> n 2)
503		(let* ((j (quotient n 2))
504		       (a (step j))
505		       (k (- n j))
506		       (b (step k)))
507		    (merge! a b less?)))
508	    ((= n 2)
509		(let ((x (car seq))
510		      (y (cadr seq))
511		      (p seq))
512		    (set! seq (cddr seq))
513		    (if (less? y x) (begin
514			(set-car! p y)
515			(set-car! (cdr p) x)))
516		    (set-cdr! (cdr p) '())
517		    p))
518	    ((= n 1)
519		(let ((p seq))
520		    (set! seq (cdr seq))
521		    (set-cdr! p '())
522		    p))
523	    (else
524		'()) ))
525    (if (vector? seq)
526	(let ((n (vector-length seq))
527	      (vec seq))
528	  (set! seq (vector->list seq))
529	  (do ((p (step n) (cdr p))
530	       (i 0 (+ i 1)))
531	      ((null? p) vec)
532	    (vector-set! vec i (car p)) ))
533	;; otherwise, assume it is a list
534	(step (length seq)) ))
535
536;;; (sort sequence less?)
537;;; sorts a vector or list non-destructively.  It does this by sorting a
538;;; copy of the sequence.  My understanding is that the Standard says
539;;; that the result of append is always "newly allocated" except for
540;;; sharing structure with "the last argument", so (append x '()) ought
541;;; to be a standard way of copying a list x.
542
543(define (sort seq less?)
544    (if (vector? seq)
545	(list->vector (sort! (vector->list seq) less?))
546	(sort! (append seq '()) less?)))
547
548
549;;; Topological sort with cycle detection:
550;;
551;; A functional implementation of the algorithm described in Cormen,
552;; et al. (2009), Introduction to Algorithms (3rd ed.), pp. 612-615.
553
554(define (topological-sort dag pred)
555  (define (visit dag node edges path state)
556    (case (alist-ref node (car state) pred)
557      ((grey)
558       (abort
559        (##sys#make-structure
560         'condition
561         '(exn runtime cycle)
562         `((exn . message) "cycle detected"
563           (exn . arguments) ,(list (cons node (reverse path)))
564           (exn . call-chain) ,(get-call-chain)
565           (exn . location) topological-sort))))
566      ((black)
567       state)
568      (else
569       (let walk ((edges (or edges (alist-ref node dag pred '())))
570                  (state (cons (cons (cons node 'grey) (car state))
571                               (cdr state))))
572         (if (null? edges)
573             (cons (alist-update! node 'black (car state) pred)
574                   (cons node (cdr state)))
575             (let ((edge (car edges)))
576               (walk (cdr edges)
577                     (visit dag
578                            edge
579                            #f
580                            (cons edge path)
581                            state))))))))
582  (define normalized-dag
583    (foldl (lambda (result node)
584             (alist-update! (car node)
585                            (append (cdr node)
586                                    (or (alist-ref (car node) dag pred) '()))
587                            result
588                            pred))
589           '()
590           dag))
591  (let loop ((dag normalized-dag)
592             (state (cons (list) (list))))
593    (if (null? dag)
594        (cdr state)
595        (loop (cdr dag)
596              (visit dag
597                     (caar dag)
598                     (cdar dag)
599                     '()
600                     state)))))
601) ; chicken.sort
602
Trap