~ chicken-core (chicken-5) /tests/test-irregex.scm


  1;;;: test-irregex.scm
  2
  3
  4(import (only chicken.string string-split)
  5	(rename (only chicken.string string-intersperse) (string-intersperse string-join)) ;; Avoid srfi-13
  6        chicken.format chicken.io chicken.irregex chicken.port)
  7
  8(include "test.scm")
  9
 10(define (cat . args)
 11  (let ((out (open-output-string)))
 12    (for-each (lambda (x) (display x out)) args)
 13    (get-output-string out)))
 14
 15(define (warning . args)
 16  (for-each (lambda (x) (display x (current-error-port))) args)
 17  (newline (current-error-port)))
 18
 19(define (call-with-input-file file proc)
 20  (let* ((in (open-input-file file))
 21         (res (proc in)))
 22    (close-input-port in)
 23    res))
 24
 25(define (call-with-input-string str proc)
 26  (let* ((in (open-input-string str))
 27         (res (proc in)))
 28    (close-input-port in)
 29    res))
 30
 31(define (call-with-output-string proc)
 32  (let ((out (open-output-string)))
 33    (proc out)
 34    (let ((res (get-output-string out)))
 35      (close-output-port out)
 36      res)))
 37
 38(define (port-for-each proc read . o)
 39  (let ((in (if (pair? o) (car o) (current-input-port))))
 40    (let lp ()
 41      (let ((x (read in)))
 42        (unless (eof-object? x)
 43          (proc x)
 44          (lp))))))
 45
 46(define (subst-matches matches subst)
 47  (define (submatch n)
 48    (if (irregex-match-data? matches)
 49        (and (irregex-match-valid-index? matches n)
 50             (irregex-match-substring matches n))
 51        (list-ref matches n)))
 52  (and
 53   matches
 54   (call-with-output-string
 55     (lambda (out)
 56       (call-with-input-string subst
 57         (lambda (in)
 58           (let lp ()
 59             (let ((c (read-char in)))
 60               (cond
 61                ((not (eof-object? c))
 62                 (case c
 63                   ((#\&)
 64                    (display (or (submatch 0) "") out))
 65                   ((#\\)
 66                    (let ((c (read-char in)))
 67                      (if (char-numeric? c)
 68                          (let lp ((res (list c)))
 69                            (if (and (char? (peek-char in))
 70                                     (char-numeric? (peek-char in)))
 71                                (lp (cons (read-char in) res))
 72                                (display
 73                                 (or (submatch (string->number
 74                                                (list->string (reverse res))))
 75                                     "")
 76                                 out)))
 77                          (write-char c out))))
 78                   (else
 79                    (write-char c out)))
 80                 (lp)))))))))))
 81
 82(define (test-re matcher line)
 83  (let ((splt (string-split line "\t" #t)))
 84    (if (list? splt)
 85	(apply
 86	 (lambda (pattern input result subst output)
 87	   (let ((name (cat pattern "  " input "  " result "  " subst)))
 88	     (cond
 89	      ((equal? "c" result)
 90	       (test-error name (matcher pattern input)))
 91	      ((equal? "n" result)
 92	       (test-assert name (not (matcher pattern input))))
 93	      (else
 94	       (test-equal name output
 95		     (subst-matches (matcher pattern input) subst))))))
 96	 splt)
 97	(warning "invalid regex test line" line))))
 98
 99(test-begin "basic irregex tests")
100
101;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
102;; basic irregex
103
104(for-each
105 (lambda (opts)
106   (test-group (cat "irregex - " opts)
107     (call-with-input-file "re-tests.txt"
108       (lambda (in)
109         (port-for-each
110          (lambda (line)
111            (test-re (lambda (pat str)
112                       (irregex-search (apply irregex pat opts) str))
113                     line))
114          read-line
115          in)))))
116 '((backtrack)
117   (fast)
118   ))
119
120;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
121;; ;; chunked irregex
122
123(define (rope . args)
124  (map (lambda (x) (if (pair? x) x (list x 0 (string-length x)))) args))
125
126(define rope-chunker
127  (make-irregex-chunker
128   (lambda (x) (and (pair? (cdr x)) (cdr x)))
129   caar
130   cadar
131   caddar
132   (lambda (src1 i src2 j)
133     (if (eq? src1 src2)
134         (substring (caar src1) i j)
135         (let lp ((src (cdr src1))
136                  (res (list (substring (caar src1) i (caddar src1)))))
137           (if (eq? src src2)
138               (string-join
139                (reverse (cons (substring (caar src2) (cadar src2) j) res))
140                "")
141               (lp (cdr src)
142                   (cons (substring (caar src) (cadar src) (caddar src))
143                         res))))))))
144
145(define (make-ropes str)
146  (let ((len (string-length str)))
147    (case len
148      ((0 1)
149       (list (rope str)))
150      ((2)
151       (list (rope str)
152             (rope (substring str 0 1) (substring str 1 2))))
153      ((3)
154       (list (rope str)
155             (rope (substring str 0 1) (substring str 1 3))
156             (rope (substring str 0 2) (substring str 2 3))
157             (rope (substring str 0 1)
158                   (substring str 1 2)
159                   (substring str 2 3))))
160      (else
161       (let ((mid (quotient (+ len 1) 2)))
162         (list (rope str)
163               (rope (substring str 0 1) (substring str 1 len))
164               (rope (substring str 0 mid) (substring str mid len))
165               (rope (substring str 0 (- len 1))
166                     (substring str (- len 1) len))
167               (rope (substring str 0 1)
168                     (substring str 1 mid)
169                     (substring str mid len))
170               ))))))
171
172(define (make-shared-ropes str)
173  (let ((len (string-length str)))
174    (case len
175      ((0 1)
176       '())
177      ((2)
178       (list (list (list str 0 1) (list str 1 2))))
179      ((3)
180       (list (list (list str 0 1) (list str 1 3))
181             (list (list str 0 2) (list str 2 3))
182             (list (list str 0 1) (list str 1 2) (list str 2 3))))
183      (else
184       (let ((mid (quotient (+ len 1) 2)))
185         (list (list (list str 0 1) (list str 1 len))
186               (list (list str 0 mid) (list str mid len))
187               (list (list str 0 (- len 1))
188                     (list str (- len 1) len))
189               (list (list str 0 1) (list str 1 mid) (list str mid len))
190               ))))))
191
192(for-each
193 (lambda (opts)
194   (test-group (cat "irregex/chunked - " opts)
195    (call-with-input-file "re-tests.txt"
196      (lambda (in)
197	(port-for-each
198	 (lambda (line)
199	   (let ((splt (string-split line "\t" #t)))
200	     (if (list? splt)
201		 (apply 
202		  (lambda (pattern input result subst output)
203		    (let ((name
204			   (cat pattern "  " input "  " result "  " subst)))
205		      (cond
206		       ((equal? "c" result))
207		       ((equal? "n" result)
208			(for-each
209			 (lambda (rope)
210			   (test-assert name
211					(not (irregex-search/chunked pattern
212								     rope-chunker
213								     rope))))
214			 (append (make-ropes input)
215				 (make-shared-ropes input))))
216		       (else
217			(for-each
218			 (lambda (rope)
219			   (test-equal
220			    name output
221			    (subst-matches (irregex-search/chunked pattern
222								   rope-chunker
223								   rope)
224					   subst)))
225			 (append (make-ropes input)
226				 (make-shared-ropes input)))))))
227		  splt)
228		 (warning "invalid regex test line" line))))
229	 read-line
230	 in)))))
231 '((backtrack)
232   (fast)
233   ))
234
235;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
236;; pregexp
237
238'(test-group "pregexp"
239   (call-with-input-file "re-tests.txt"
240     (lambda (in)
241       (port-for-each
242        (lambda (line) (test-re pregexp-match line))
243        read-line
244        in))))
245
246;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
247;; default regex (PCRE)
248
249'(test-group "regex"
250   (call-with-input-file "re-tests.txt"
251     (lambda (in)
252       (port-for-each
253        (lambda (line) (test-re string-search line))
254        read-line
255        in))))
256
257;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
258
259(test-group "unmatchable patterns"
260  (test-assert (not (irregex-search '(or) "abc")))
261  (test-assert (not (irregex-search '(: "ab" (or)) "abc")))
262  (test-assert (not (irregex-search '(submatch "ab" (or)) "abc")))
263  (test-assert (not (irregex-search '(: "ab" (submatch (or))) "abc")))
264  (test-assert (not (irregex-search '(/) "abc")))
265  (test-assert (not (irregex-search '(: "ab" (/)) "abc")))
266  (test-assert (not (irregex-search '(~ any) "abc")))
267  (test-assert (not (irregex-search '(: "ab" (~ any)) "abc")))
268  (test-assert (not (irregex-search '("") "abc")))
269  (test-assert (not (irregex-search '(: "ab" ("")) "abc")))
270  (test-assert (not (irregex-search '(: (+ print) white) "abc")))
271  )
272
273;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
274
275(test-group "beginning/end of chunks"
276  (test-assert
277      (irregex-search/chunked '(: bos "foo") rope-chunker '((" foo" 0 4)) 1))
278  (test-assert
279      (irregex-search/chunked '(: bos "foo") rope-chunker '(("  foo" 1 5)) 2))
280  (test-assert
281      (irregex-search/chunked '(: bos "foo" eos) rope-chunker '((" foo" 1 4)) 1))
282  (test-assert
283      (irregex-search/chunked '(: bos "foo" eos) rope-chunker '(("  foo" 2 5)) 2))
284  (test-assert
285      (irregex-search/chunked '(: bos "foo" eos) rope-chunker '((" foo" 0 4)) 1))
286  (test-assert
287      (irregex-search/chunked '(: bos "foo" eos) rope-chunker '(("  foo" 1 5)) 2))
288  )
289
290;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
291
292(test-group "Case sensitivity"
293  (test-assert
294   (not (irregex-match '(seq "abc") "ABC")))
295  (test-assert
296   (irregex-match (irregex '(seq "abc") 'case-insensitive) "ABC"))
297  (test-assert
298   (irregex-match '(w/nocase "abc") "ABC"))
299  (test-assert
300   (not (irregex-match '(w/nocase (w/case "abc")) "ABC")))
301  (test-assert
302   (irregex-match '(w/nocase (* ("abc"))) "ABC"))
303  (test-assert
304   (not (irregex-match '(w/nocase (w/case (* ("abc")))) "ABC")))
305  (test-assert
306   (irregex-match '(w/nocase (* (/ #\a #\c))) "ABC"))
307  (test-assert
308   (not (irregex-match '(w/nocase (w/case (/ #\a #\c))) "ABC")))
309  (test-assert
310   (not (irregex-match '(w/nocase (* (~ (/ #\a #\c)))) "abc")))
311  (test-assert
312   (not (irregex-match '(w/nocase (* (~ (/ #\a #\c)))) "ABC"))))
313
314;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
315
316(test-group "API"
317  (test-group "predicates"
318    (test-assert (irregex? (irregex "a.*b")))
319    (test-assert (irregex? (irregex '(: "a" (* any) "b"))))
320    (test-assert (not (irregex? (vector '*irregex-tag* #f #f #f #f #f #f))))
321    (test-assert (not (irregex? (vector #f #f #f #f #f #f #f #f))))
322    (test-assert (irregex-match-data? (irregex-search "a.*b" "axxxb")))
323    (test-assert (irregex-match-data? (irregex-match "a.*b" "axxxb")))
324    (test-assert (not (irregex-match-data? (vector '*irregex-match-tag* #f #f #f #f #f #f #f #f #f))))
325    (test-assert (not (irregex-match-data? (vector #f #f #f #f #f #f #f #f #f #f #f)))))
326  (test-group "valid index"
327    (test-assert
328     (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 0))
329    (test-assert
330     (not (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 1)))
331    (test-assert
332     (not (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") -1)))
333    (test-assert
334     (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "axxx") 0))
335    (test-assert
336     (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "axxx") 1))
337    (test-assert
338     (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "axxx") 2))
339    (test-assert
340     (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "b") 2))
341    (test-assert
342     (not (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 3)))
343    (test-assert
344     (not (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") -1))))
345  (test-group "number of submatches"
346    (test-equal 0 (irregex-num-submatches (irregex "a.*b")))
347    (test-equal 1 (irregex-num-submatches (irregex "a(.*)b")))
348    (test-equal 2 (irregex-num-submatches (irregex "(a(.*))b")))
349    (test-equal 2 (irregex-num-submatches (irregex "a(.*)(b)")))
350    (test-equal 10 (irregex-num-submatches (irregex "((((((((((a))))))))))")))
351    (test-equal 0 (irregex-match-num-submatches (irregex-search "a.*b" "axxxb")))
352    (test-equal 1 (irregex-match-num-submatches (irregex-search "a(.*)b" "axxxb")))
353    (test-equal 2 (irregex-match-num-submatches (irregex-search "(a(.*))b" "axxxb")))
354    (test-equal 2 (irregex-match-num-submatches (irregex-search "a(.*)(b)" "axxxb")))
355    (test-equal 10 (irregex-match-num-submatches (irregex-search "((((((((((a))))))))))" "a"))))
356  (test-group "match substring"
357    (test-equal "axxxb" (irregex-match-substring (irregex-search "a.*b" "axxxb") 0))
358    (test-error (irregex-match-substring (irregex-search "a.*b" "axxxb") 1))
359    (test-equal "xxx" (irregex-match-substring (irregex-search "a(.*)|b" "axxx") 1))
360    (test-equal #f (irregex-match-substring (irregex-search "a(.*)|b" "b") 1))
361    (test-error (irregex-match-substring (irregex-search "a(.*)|b" "axxx") 2))
362    (test-error (irregex-match-substring (irregex-search "a(.*)|b" "b") 2)))
363  (test-group "match start-index"
364    (test-equal 0 (irregex-match-start-index (irregex-search "a.*b" "axxxb") 0))
365    (test-error (irregex-match-start-index (irregex-search "a.*b" "axxxb") 1))
366    (test-equal 1 (irregex-match-start-index (irregex-search "a(.*)|b" "axxx") 1))
367    (test-equal #f (irregex-match-start-index (irregex-search "a(.*)|b" "b") 1))
368    (test-error (irregex-match-start-index (irregex-search "a(.*)|b" "axxx") 2))
369    (test-error (irregex-match-start-index (irregex-search "a(.*)|b" "b") 2)))
370  (test-group "match end-index"
371    (test-equal 5 (irregex-match-end-index (irregex-search "a.*b" "axxxb") 0))
372    (test-error (irregex-match-end-index (irregex-search "a.*b" "axxxb") 1))
373    (test-equal 4 (irregex-match-end-index (irregex-search "a(.*)|b" "axxx") 1))
374    (test-equal #f (irregex-match-end-index (irregex-search "a(.*)|b" "b") 1))
375    (test-error (irregex-match-end-index (irregex-search "a(.*)|b" "axxx") 2))
376    (test-error (irregex-match-end-index (irregex-search "a(.*)|b" "b") 2)))
377  )
378
379;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
380
381(test-group "utils"
382  (test-equal "h*llo world"
383      (irregex-replace "[aeiou]" "hello world" "*"))
384  (test-equal "hello world"
385      (irregex-replace "[xyz]" "hello world" "*"))
386  (test-equal "h*ll* w*rld"
387      (irregex-replace/all "[aeiou]" "hello world" "*"))
388  (test-equal '("bob@test.com" "fred@example.com")
389      (irregex-fold 'email
390                    (lambda (i m s) (cons (irregex-match-substring m) s))
391                    '()
392                    "bob@test.com and fred@example.com"
393                    (lambda (i s) (reverse s))))
394  (test-equal '("bob@test.com" "fred@example.com")
395      (irregex-fold/chunked
396       'email
397       (lambda (src i m s) (cons (irregex-match-substring m) s))
398       '()
399       rope-chunker
400       (rope "bob@test.com and fred@example.com")
401       (lambda (src i s) (reverse s))))
402  (test-equal '("poo poo ")
403      (irregex-fold '(+ "poo ")
404                    (lambda (i m s)
405                      (if (< i (irregex-match-end-index m 0))
406                          (cons (irregex-match-substring m) s)
407                          s))
408                    '()
409                    "poo poo platter"))
410  (test-equal "*  x   "
411      (irregex-replace/all
412       (irregex '(: bos #\space) 'backtrack) "   x   " "*"))
413  (test-equal "*  x   "
414      (irregex-replace/all
415       (irregex '(: bos #\space) 'dfa) "   x   " "*"))
416  (test-equal "***x***"
417      (irregex-replace/all
418       (irregex '(: #\space) 'backtrack) "   x   " "*"))
419  (test-equal "***x***"
420      (irregex-replace/all
421       (irregex '(: #\space) 'dfa) "   x   " "*"))
422  (test-equal "A:42"
423      (irregex-replace/all "^" "42" "A:"))
424  (test-equal "A:42"
425      (irregex-replace/all 'bos "42" "A:"))
426  (test-equal "A:42"
427      (irregex-replace/all 'bol "42" "A:"))
428  (test-equal "xaac"
429      (irregex-replace/all
430       (irregex '(or (seq bos "a") (seq bos "b")) 'backtrack) "aaac" "x"))
431  (test-equal "xaac"
432      (irregex-replace/all
433       (irregex '(or (seq bos "a") (seq bos "b")) 'dfa) "aaac" "x"))
434  (test-equal "xaac"
435      (irregex-replace/all (irregex '(or (seq bos "a") "b") 'backtrack)
436                           "aaac" "x"))
437  (test-equal "xaac"
438      (irregex-replace/all (irregex '(or (seq bos "a") "b") 'dfa)
439                           "aaac" "x"))
440  (test-equal "*Line 1\n*Line 2"
441      (irregex-replace/all 'bol "Line 1\nLine 2" "*"))
442  (test-equal "**p*l*a*t*t*e*r"
443      (irregex-replace/all '(* "poo ") "poo poo platter" "*"))
444  (test-equal "x- y- z-"
445      (irregex-replace/all '(: (look-behind (or "x" "y" "z")) "a")
446                           "xa ya za"  "-"))
447  (test-equal "any gosh darned string"
448	(irregex-replace/all '(: bos (* whitespace))
449                             "any gosh darned string"  ""))
450  (test-equal '("foo" " " "foo" " " "b" "a" "r" " " "foo")
451      (irregex-extract '(or (: bow "foo" eow) any) "foo foo bar foo"))
452  (test-equal '("f" "o" "o" "b" "a" "r" "b" "a" "z")
453      (irregex-split (irregex "") "foobarbaz"))
454  (test-equal '("f" "b" "r" "b" "z")
455      (irregex-split (irregex "[aeiou]*") "foobarbaz"))
456  (test-equal '("" "oo" "" "a" "" "" "a" "")
457      (irregex-extract (irregex "[aeiou]*") "foobarbaz"))
458  (test-equal '("Line 1\n" "Line 2\n" "Line 3")
459      (irregex-split 'bol "Line 1\nLine 2\nLine 3"))
460  (test-equal '("foo\n" "bar\n" "baz\n")
461      (irregex-extract '(: bol (+ alpha) newline) "\nfoo\nbar\nbaz\n"))
462  (test-equal '("\nblah" "\nblah" "\nblah")
463      (irregex-extract '(: newline "blah" eol) "\nblah\nblah\nblah\n"))
464  )
465
466
467(test-group "parsing"
468  (test-equal "c+" (sre->string '(+ "c")))
469  (test-equal "(?:abc)+" (sre->string '(+ "abc")))
470  (test-equal "(?:abc|def)+" (sre->string '(+ (or "abc" "def"))))
471  (test-equal '(+ #\c) (string->sre "c+"))
472  (test-equal '(+ "abc") (string->sre "(?:abc)+"))
473  (test-equal '(+ (or "abc" "def")) (string->sre "(?:abc|def)+"))
474  )
475
476(define (extract name irx str)
477  (irregex-match-substring (irregex-match irx str) name))
478(define (valid? name irx str)
479  (irregex-match-valid-index? (irregex-match irx str) name))
480(define (start-idx name irx str)
481  (irregex-match-start-index (irregex-match irx str) name))
482(define (end-idx name irx str)
483  (irregex-match-end-index (irregex-match irx str) name))
484
485(test-group "named submatches"
486  (test-equal "matching submatch is seen and extracted"
487              "first" (extract 'first `(or (submatch-named first "first")
488                                           (submatch-named second "second"))
489                               "first"))
490  (test-assert "matching submatch index is valid"
491               (valid? 'first `(or (submatch-named first "first")
492                                   (submatch-named second "second"))
493                       "first"))
494  (test-equal "nonmatching submatch is known but returns false"
495              #f
496              (extract 'second `(or (submatch-named first "first")
497                                    (submatch-named second "second"))
498                       "first"))
499  (test-assert "nonmatching submatch index is valid"
500               (valid? 'second `(or (submatch-named first "first")
501                                    (submatch-named second "second"))
502                       "first"))
503  (test-error "nonexisting submatch is unknown and raises an error"
504              (extract 'third `(or (submatch-named first "first")
505                                   (submatch-named second "second"))
506                       "first"))
507  (test-assert "nonexisting submatch index is invalid"
508               (not (valid? 'third `(or (submatch-named first "first")
509                                         (submatch-named second "second"))
510                            "first")))
511  (test-equal "matching alternative is used"
512              "first" (extract 'sub `(or (submatch-named sub "first")
513                                         (submatch-named sub "second"))
514                               "first"))
515  (test-equal "matching alternative is used (second match)"
516              "second" (extract 'sub `(or (submatch-named sub "first")
517                                          (submatch-named sub "second"))
518                                "second"))
519  (test-equal "last match is used with multiple matches for a name"
520              "second" (extract 'sub `(seq (submatch-named sub "first")
521                                           space
522                                           (submatch-named sub "second"))
523                                "first second"))
524  (test-equal "submatch start"
525              1
526              (start-idx 'xs `(seq "a" (submatch-named xs (+ "x")) "b") "axxxb"))
527  (test-error "unknown submatch start"
528              (start-idx 'xs `(seq "a" (submatch-named ys (+ "x")) "b") "axxxb"))
529  (test-equal "submatch end"
530              4 (end-idx 'xs `(seq "a" (submatch-named xs (+ "x")) "b") "axxxb"))
531  (test-error "unknown submatch start"
532              (end-idx 'xs `(seq "a" (submatch-named ys (+ "x")) "b") "axxxb")))
533
534;; This is here to help optimized implementations catch segfaults and
535;; other such problems.  These calls will always return errors in plain
536;; Scheme, but only because it will try to use the invalid object in a
537;; way that's not supported by the operator.  Once Scheme grows a
538;; standardized way of signaling and catching exceptions, these tests
539;; should be changed and expanded to check for specific condition types,
540;; and probably moved to the group where the procedure is being tested.
541(test-group "error handling"
542  (test-error (irregex 'invalid-sre))
543  (test-error (string->irregex 'not-a-string))
544  (test-error (sre->irregex 'invalid-sre))
545  
546  (test-error (irregex-search 'not-an-irx-or-sre "foo"))
547  (test-error (irregex-search "foo" 'not-a-string))
548  (test-error (irregex-search "foo" "foo" 'not-a-number))
549  (test-error (irregex-search "foo" "foo" 0 'not-a-number))
550
551  ;; TODO: irregex-new-matches, irregex-reset-matches!
552  ;; irregex-search/matches, make-irregex-chunker?
553
554  (test-error (irregex-match-valid-index? 'not-a-match-object 0))
555  (test-error (irregex-match-start-index 'not-a-match-object 0))
556  (test-error (irregex-match-start-index (irregex-search "foo" "foo") -1))
557  (test-error (irregex-match-end-index 'not-a-match-object 0))
558  (test-error (irregex-match-end-index (irregex-search "foo" "foo") -1))
559  
560  (test-error (irregex-match-start-chunk 'not-a-match-object 0))
561  (test-error (irregex-match-end-chunk 'not-a-match-object 0))
562  (test-error (irregex-match-substring 'not-a-match-object 0))
563  (test-error (irregex-match-subchunk 'not-a-match-object 0))
564  (test-error (irregex-match-num-submatches 'not-a-match-object))
565  (test-error (irregex-match-names 'not-a-match-object))
566  (test-error (irregex-num-submatches 'not-an-irx))
567  (test-error (irregex-names 'not-an-irx))
568  
569  (test-error (irregex-fold 'not-an-irx (lambda x x) 0 "foo" (lambda x x) 0 3))
570  (test-error (irregex-fold "foo" 'not-a-proc 0 "foo" (lambda x x) 0 3))
571  (test-error (irregex-fold "foo" (lambda (a b) b) 0 'not-a-string
572                            (lambda x x) 0 3))
573  (test-error (irregex-fold "foo" (lambda (a b) b) 0 "foo" 'not-a-proc 0 3))
574  (test-error (irregex-fold "foo" (lambda (a b) b) 0 "foo" (lambda x x)
575                            'not-a-number 3))
576  (test-error (irregex-fold "foo" (lambda (a b) b) 0 "foo" (lambda x x) 0
577                            'not-a-number))
578
579  (test-error (irregex-replace 'not-an-irx "str"))
580  (test-error (irregex-replace "foo" "foo" (lambda (x) 'not-a-string)))
581  (test-error (irregex-replace/all 'not-an-irx "str"))
582  (test-error (irregex-replace/all "foo" "foo" (lambda (x) 'not-a-string)))
583
584  ;; Are these supposed to be exported?
585  ;; irregex-nfa, irregex-dfa, irregex-dfa/search, irregex-dfa/extract
586  ;; irregex-flags, irregex-lengths
587  )
588
589(test-group "SRE representation edge cases"
590  ;; NFA compilation skipped alternative after empty sequence (#26, found by John Clements)
591  (test-equal "empty sequence in \"or\""
592              ""
593              (irregex-match-substring (irregex-search `(or (seq) "foo") "")))
594  (test-equal "alternative to empty sequence in \"or\""
595              "foo"
596              (irregex-match-substring (irregex-search `(or (seq) "foo") "foo"))))
597
598(test-end)
599
600
601;;; UTF-8 tests
602
603(test-begin "utf-8 tests")
604
605(test-assert (irregex-search "(?u:<..>)" "<漢字>"))
606(test-assert (irregex-search "(?u:<.*>)" "<漢字>"))
607(test-assert (irregex-search "(?u:<.+>)" "<漢字>"))
608(test-assert (not (irregex-search "(?u:<.>)" "<漢字>")))
609(test-assert (not (irregex-search "(?u:<...>)" "<漢>")))
610
611(test-assert (irregex-search "(?u:<[^a-z]*>)" "<漢字>"))
612(test-assert (not (irregex-search "(?u:<[^a-z]*>)" "<漢m字>")))
613(test-assert (irregex-search "(?u:<[^a-z][^a-z]>)" "<漢字>"))
614(test-assert (irregex-search "(?u:<あ*>)" "<あ>"))
615(test-assert (irregex-search "(?u:<あ*>)" "<ああ>"))
616(test-assert (not (irregex-search "(?u:<あ*>)" "<あxあ>")))
617
618(test-assert (irregex-search "(?u:<[あ-ん]*>)" "<あん>"))
619(test-assert (irregex-search "(?u:<[あ-ん]*>)" "<ひらがな>"))
620(test-assert (not (irregex-search "(?u:<[あ-ん]*>)" "<ひらgがな>")))
621(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>")))
622
623(test-assert (irregex-search "(?u:<[^あ-ん]*>)" "<abc>"))
624(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<あん>")))
625(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<ひらがな>")))
626(test-assert (irregex-search "(?u:<[^あ-ん語]*>)" "<abc>"))
627(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<あん>")))
628(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<ひらがな>")))
629(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>")))
630
631(test-assert (not (irregex-search (irregex "[一二]" 'utf8 #t) "三四")))
632
633(test-end)
634
635(test-exit)
Trap