~ chicken-core (master) /tests/test-irregex.scm


  1;;;: test-irregex.scm
  2
  3
  4(import (only chicken.string string-split)
  5	(rename (only chicken.string string-intersperse) (string-intersperse string-join)) ;; Avoid srfi-13
  6        chicken.format chicken.io chicken.irregex chicken.port)
  7
  8(include "test.scm")
  9              
 10(import (only (scheme base) open-output-string get-output-string open-input-string))
 11
 12(define (cat . args)
 13  (let ((out (open-output-string)))
 14    (for-each (lambda (x) (display x out)) args)
 15    (get-output-string out)))
 16
 17(define (warning . args)
 18  (for-each (lambda (x) (display x (current-error-port))) args)
 19  (newline (current-error-port)))
 20
 21(define (call-with-input-file file proc)
 22  (let* ((in (open-input-file file))
 23         (res (proc in)))
 24    (close-input-port in)
 25    res))
 26
 27(define (call-with-input-string str proc)
 28  (let* ((in (open-input-string str))
 29         (res (proc in)))
 30    (close-input-port in)
 31    res))
 32
 33(define (call-with-output-string proc)
 34  (let ((out (open-output-string)))
 35    (proc out)
 36    (let ((res (get-output-string out)))
 37      (close-output-port out)
 38      res)))
 39
 40(define (port-for-each proc read . o)
 41  (let ((in (if (pair? o) (car o) (current-input-port))))
 42    (let lp ()
 43      (let ((x (read in)))
 44        (unless (eof-object? x)
 45          (proc x)
 46          (lp))))))
 47
 48(define (subst-matches matches subst)
 49  (define (submatch n)
 50    (if (irregex-match-data? matches)
 51        (and (irregex-match-valid-index? matches n)
 52             (irregex-match-substring matches n))
 53        (list-ref matches n)))
 54  (and
 55   matches
 56   (call-with-output-string
 57     (lambda (out)
 58       (call-with-input-string subst
 59         (lambda (in)
 60           (let lp ()
 61             (let ((c (read-char in)))
 62               (cond
 63                ((not (eof-object? c))
 64                 (case c
 65                   ((#\&)
 66                    (display (or (submatch 0) "") out))
 67                   ((#\\)
 68                    (let ((c (read-char in)))
 69                      (if (char-numeric? c)
 70                          (let lp ((res (list c)))
 71                            (if (and (char? (peek-char in))
 72                                     (char-numeric? (peek-char in)))
 73                                (lp (cons (read-char in) res))
 74                                (display
 75                                 (or (submatch (string->number
 76                                                (list->string (reverse res))))
 77                                     "")
 78                                 out)))
 79                          (write-char c out))))
 80                   (else
 81                    (write-char c out)))
 82                 (lp)))))))))))
 83
 84(define (test-re matcher line)
 85  (let ((splt (string-split line "\t" #t)))
 86    (if (list? splt)
 87	(apply
 88	 (lambda (pattern input result subst output)
 89	   (let ((name (cat pattern "  " input "  " result "  " subst)))
 90	     (cond
 91	      ((equal? "c" result)
 92	       (test-error name (matcher pattern input)))
 93	      ((equal? "n" result)
 94	       (test-assert name (not (matcher pattern input))))
 95	      (else
 96	       (test-equal name output
 97		     (subst-matches (matcher pattern input) subst))))))
 98	 splt)
 99	(warning "invalid regex test line" line))))
100
101(test-begin "basic irregex tests")
102
103;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
104;; basic irregex
105
106(for-each
107 (lambda (opts)
108   (test-group (cat "irregex - " opts)
109     (call-with-input-file "re-tests.txt"
110       (lambda (in)
111         (port-for-each
112          (lambda (line)
113            (test-re (lambda (pat str)
114                       (irregex-search (apply irregex pat opts) str))
115                     line))
116          read-line
117          in)))))
118 '((backtrack)
119   (fast)
120   ))
121
122;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
123;; ;; chunked irregex
124
125(define (rope . args)
126  (map (lambda (x) (if (pair? x) x (list x 0 (string-length x)))) args))
127
128(define rope-chunker
129  (make-irregex-chunker
130   (lambda (x) (and (pair? (cdr x)) (cdr x)))
131   caar
132   cadar
133   caddar
134   (lambda (src1 i src2 j)
135     (if (eq? src1 src2)
136         (substring (caar src1) i j)
137         (let lp ((src (cdr src1))
138                  (res (list (substring (caar src1) i (caddar src1)))))
139           (if (eq? src src2)
140               (string-join
141                (reverse (cons (substring (caar src2) (cadar src2) j) res))
142                "")
143               (lp (cdr src)
144                   (cons (substring (caar src) (cadar src) (caddar src))
145                         res))))))))
146
147(define (make-ropes str)
148  (let ((len (string-length str)))
149    (case len
150      ((0 1)
151       (list (rope str)))
152      ((2)
153       (list (rope str)
154             (rope (substring str 0 1) (substring str 1 2))))
155      ((3)
156       (list (rope str)
157             (rope (substring str 0 1) (substring str 1 3))
158             (rope (substring str 0 2) (substring str 2 3))
159             (rope (substring str 0 1)
160                   (substring str 1 2)
161                   (substring str 2 3))))
162      (else
163       (let ((mid (quotient (+ len 1) 2)))
164         (list (rope str)
165               (rope (substring str 0 1) (substring str 1 len))
166               (rope (substring str 0 mid) (substring str mid len))
167               (rope (substring str 0 (- len 1))
168                     (substring str (- len 1) len))
169               (rope (substring str 0 1)
170                     (substring str 1 mid)
171                     (substring str mid len))
172               ))))))
173
174(define (make-shared-ropes str)
175  (let ((len (string-length str)))
176    (case len
177      ((0 1)
178       '())
179      ((2)
180       (list (list (list str 0 1) (list str 1 2))))
181      ((3)
182       (list (list (list str 0 1) (list str 1 3))
183             (list (list str 0 2) (list str 2 3))
184             (list (list str 0 1) (list str 1 2) (list str 2 3))))
185      (else
186       (let ((mid (quotient (+ len 1) 2)))
187         (list (list (list str 0 1) (list str 1 len))
188               (list (list str 0 mid) (list str mid len))
189               (list (list str 0 (- len 1))
190                     (list str (- len 1) len))
191               (list (list str 0 1) (list str 1 mid) (list str mid len))
192               ))))))
193
194(for-each
195 (lambda (opts)
196   (test-group (cat "irregex/chunked - " opts)
197    (call-with-input-file "re-tests.txt"
198      (lambda (in)
199	(port-for-each
200	 (lambda (line)
201	   (let ((splt (string-split line "\t" #t)))
202	     (if (list? splt)
203		 (apply 
204		  (lambda (pattern input result subst output)
205		    (let ((name
206			   (cat pattern "  " input "  " result "  " subst)))
207		      (cond
208		       ((equal? "c" result))
209		       ((equal? "n" result)
210			(for-each
211			 (lambda (rope)
212			   (test-assert name
213					(not (irregex-search/chunked pattern
214								     rope-chunker
215								     rope))))
216			 (append (make-ropes input)
217				 (make-shared-ropes input))))
218		       (else
219			(for-each
220			 (lambda (rope)
221			   (test-equal
222			    name output
223			    (subst-matches (irregex-search/chunked pattern
224								   rope-chunker
225								   rope)
226					   subst)))
227			 (append (make-ropes input)
228				 (make-shared-ropes input)))))))
229		  splt)
230		 (warning "invalid regex test line" line))))
231	 read-line
232	 in)))))
233 '((backtrack)
234   (fast)
235   ))
236
237;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
238;; pregexp
239
240'(test-group "pregexp"
241   (call-with-input-file "re-tests.txt"
242     (lambda (in)
243       (port-for-each
244        (lambda (line) (test-re pregexp-match line))
245        read-line
246        in))))
247
248;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
249;; default regex (PCRE)
250
251'(test-group "regex"
252   (call-with-input-file "re-tests.txt"
253     (lambda (in)
254       (port-for-each
255        (lambda (line) (test-re string-search line))
256        read-line
257        in))))
258
259;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
260
261(test-group "unmatchable patterns"
262  (test-assert (not (irregex-search '(or) "abc")))
263  (test-assert (not (irregex-search '(: "ab" (or)) "abc")))
264  (test-assert (not (irregex-search '(submatch "ab" (or)) "abc")))
265  (test-assert (not (irregex-search '(: "ab" (submatch (or))) "abc")))
266  (test-assert (not (irregex-search '(/) "abc")))
267  (test-assert (not (irregex-search '(: "ab" (/)) "abc")))
268  (test-assert (not (irregex-search '(~ any) "abc")))
269  (test-assert (not (irregex-search '(: "ab" (~ any)) "abc")))
270  (test-assert (not (irregex-search '("") "abc")))
271  (test-assert (not (irregex-search '(: "ab" ("")) "abc")))
272  (test-assert (not (irregex-search '(: (+ print) white) "abc")))
273  )
274
275;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
276
277(test-group "beginning/end of chunks"
278  (test-assert
279      (irregex-search/chunked '(: bos "foo") rope-chunker '((" foo" 0 4)) 1))
280  (test-assert
281      (irregex-search/chunked '(: bos "foo") rope-chunker '(("  foo" 1 5)) 2))
282  (test-assert
283      (irregex-search/chunked '(: bos "foo" eos) rope-chunker '((" foo" 1 4)) 1))
284  (test-assert
285      (irregex-search/chunked '(: bos "foo" eos) rope-chunker '(("  foo" 2 5)) 2))
286  (test-assert
287      (irregex-search/chunked '(: bos "foo" eos) rope-chunker '((" foo" 0 4)) 1))
288  (test-assert
289      (irregex-search/chunked '(: bos "foo" eos) rope-chunker '(("  foo" 1 5)) 2))
290  )
291
292;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
293
294(test-group "Case sensitivity"
295  (test-assert
296   (not (irregex-match '(seq "abc") "ABC")))
297  (test-assert
298   (irregex-match (irregex '(seq "abc") 'case-insensitive) "ABC"))
299  (test-assert
300   (irregex-match '(w/nocase "abc") "ABC"))
301  (test-assert
302   (not (irregex-match '(w/nocase (w/case "abc")) "ABC")))
303  (test-assert
304   (irregex-match '(w/nocase (* ("abc"))) "ABC"))
305  (test-assert
306   (not (irregex-match '(w/nocase (w/case (* ("abc")))) "ABC")))
307  (test-assert
308   (irregex-match '(w/nocase (* (/ #\a #\c))) "ABC"))
309  (test-assert
310   (not (irregex-match '(w/nocase (w/case (/ #\a #\c))) "ABC")))
311  (test-assert
312   (not (irregex-match '(w/nocase (* (~ (/ #\a #\c)))) "abc")))
313  (test-assert
314   (not (irregex-match '(w/nocase (* (~ (/ #\a #\c)))) "ABC"))))
315
316;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
317
318(test-group "API"
319  (test-group "predicates"
320    (test-assert (irregex? (irregex "a.*b")))
321    (test-assert (irregex? (irregex '(: "a" (* any) "b"))))
322    (test-assert (not (irregex? (vector '*irregex-tag* #f #f #f #f #f #f))))
323    (test-assert (not (irregex? (vector #f #f #f #f #f #f #f #f))))
324    (test-assert (irregex-match-data? (irregex-search "a.*b" "axxxb")))
325    (test-assert (irregex-match-data? (irregex-match "a.*b" "axxxb")))
326    (test-assert (not (irregex-match-data? (vector '*irregex-match-tag* #f #f #f #f #f #f #f #f #f))))
327    (test-assert (not (irregex-match-data? (vector #f #f #f #f #f #f #f #f #f #f #f)))))
328  (test-group "valid index"
329    (test-assert
330     (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 0))
331    (test-assert
332     (not (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 1)))
333    (test-assert
334     (not (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") -1)))
335    (test-assert
336     (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "axxx") 0))
337    (test-assert
338     (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "axxx") 1))
339    (test-assert
340     (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "axxx") 2))
341    (test-assert
342     (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "b") 2))
343    (test-assert
344     (not (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 3)))
345    (test-assert
346     (not (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") -1))))
347  (test-group "number of submatches"
348    (test-equal 0 (irregex-num-submatches (irregex "a.*b")))
349    (test-equal 1 (irregex-num-submatches (irregex "a(.*)b")))
350    (test-equal 2 (irregex-num-submatches (irregex "(a(.*))b")))
351    (test-equal 2 (irregex-num-submatches (irregex "a(.*)(b)")))
352    (test-equal 10 (irregex-num-submatches (irregex "((((((((((a))))))))))")))
353    (test-equal 0 (irregex-match-num-submatches (irregex-search "a.*b" "axxxb")))
354    (test-equal 1 (irregex-match-num-submatches (irregex-search "a(.*)b" "axxxb")))
355    (test-equal 2 (irregex-match-num-submatches (irregex-search "(a(.*))b" "axxxb")))
356    (test-equal 2 (irregex-match-num-submatches (irregex-search "a(.*)(b)" "axxxb")))
357    (test-equal 10 (irregex-match-num-submatches (irregex-search "((((((((((a))))))))))" "a"))))
358  (test-group "match substring"
359    (test-equal "axxxb" (irregex-match-substring (irregex-search "a.*b" "axxxb") 0))
360    (test-error (irregex-match-substring (irregex-search "a.*b" "axxxb") 1))
361    (test-equal "xxx" (irregex-match-substring (irregex-search "a(.*)|b" "axxx") 1))
362    (test-equal #f (irregex-match-substring (irregex-search "a(.*)|b" "b") 1))
363    (test-error (irregex-match-substring (irregex-search "a(.*)|b" "axxx") 2))
364    (test-error (irregex-match-substring (irregex-search "a(.*)|b" "b") 2)))
365  (test-group "match start-index"
366    (test-equal 0 (irregex-match-start-index (irregex-search "a.*b" "axxxb") 0))
367    (test-error (irregex-match-start-index (irregex-search "a.*b" "axxxb") 1))
368    (test-equal 1 (irregex-match-start-index (irregex-search "a(.*)|b" "axxx") 1))
369    (test-equal #f (irregex-match-start-index (irregex-search "a(.*)|b" "b") 1))
370    (test-error (irregex-match-start-index (irregex-search "a(.*)|b" "axxx") 2))
371    (test-error (irregex-match-start-index (irregex-search "a(.*)|b" "b") 2)))
372  (test-group "match end-index"
373    (test-equal 5 (irregex-match-end-index (irregex-search "a.*b" "axxxb") 0))
374    (test-error (irregex-match-end-index (irregex-search "a.*b" "axxxb") 1))
375    (test-equal 4 (irregex-match-end-index (irregex-search "a(.*)|b" "axxx") 1))
376    (test-equal #f (irregex-match-end-index (irregex-search "a(.*)|b" "b") 1))
377    (test-error (irregex-match-end-index (irregex-search "a(.*)|b" "axxx") 2))
378    (test-error (irregex-match-end-index (irregex-search "a(.*)|b" "b") 2)))
379  )
380
381;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
382
383(test-group "utils"
384  (test-equal "h*llo world"
385      (irregex-replace "[aeiou]" "hello world" "*"))
386  (test-equal "hello world"
387      (irregex-replace "[xyz]" "hello world" "*"))
388  (test-equal "h*ll* w*rld"
389      (irregex-replace/all "[aeiou]" "hello world" "*"))
390  (test-equal '("bob@test.com" "fred@example.com")
391      (irregex-fold 'email
392                    (lambda (i m s) (cons (irregex-match-substring m) s))
393                    '()
394                    "bob@test.com and fred@example.com"
395                    (lambda (i s) (reverse s))))
396  (test-equal '("bob@test.com" "fred@example.com")
397      (irregex-fold/chunked
398       'email
399       (lambda (src i m s) (cons (irregex-match-substring m) s))
400       '()
401       rope-chunker
402       (rope "bob@test.com and fred@example.com")
403       (lambda (src i s) (reverse s))))
404  (test-equal '("poo poo ")
405      (irregex-fold '(+ "poo ")
406                    (lambda (i m s)
407                      (if (< i (irregex-match-end-index m 0))
408                          (cons (irregex-match-substring m) s)
409                          s))
410                    '()
411                    "poo poo platter"))
412  (test-equal "*  x   "
413      (irregex-replace/all
414       (irregex '(: bos #\space) 'backtrack) "   x   " "*"))
415  (test-equal "*  x   "
416      (irregex-replace/all
417       (irregex '(: bos #\space) 'dfa) "   x   " "*"))
418  (test-equal "***x***"
419      (irregex-replace/all
420       (irregex '(: #\space) 'backtrack) "   x   " "*"))
421  (test-equal "***x***"
422      (irregex-replace/all
423       (irregex '(: #\space) 'dfa) "   x   " "*"))
424  (test-equal "A:42"
425      (irregex-replace/all "^" "42" "A:"))
426  (test-equal "A:42"
427      (irregex-replace/all 'bos "42" "A:"))
428  (test-equal "A:42"
429      (irregex-replace/all 'bol "42" "A:"))
430  (test-equal "xaac"
431      (irregex-replace/all
432       (irregex '(or (seq bos "a") (seq bos "b")) 'backtrack) "aaac" "x"))
433  (test-equal "xaac"
434      (irregex-replace/all
435       (irregex '(or (seq bos "a") (seq bos "b")) 'dfa) "aaac" "x"))
436  (test-equal "xaac"
437      (irregex-replace/all (irregex '(or (seq bos "a") "b") 'backtrack)
438                           "aaac" "x"))
439  (test-equal "xaac"
440      (irregex-replace/all (irregex '(or (seq bos "a") "b") 'dfa)
441                           "aaac" "x"))
442  (test-equal "*Line 1\n*Line 2"
443      (irregex-replace/all 'bol "Line 1\nLine 2" "*"))
444  (test-equal "**p*l*a*t*t*e*r"
445      (irregex-replace/all '(* "poo ") "poo poo platter" "*"))
446  (test-equal "x- y- z-"
447      (irregex-replace/all '(: (look-behind (or "x" "y" "z")) "a")
448                           "xa ya za"  "-"))
449  (test-equal "any gosh darned string"
450	(irregex-replace/all '(: bos (* whitespace))
451                             "any gosh darned string"  ""))
452  (test-equal '("foo" " " "foo" " " "b" "a" "r" " " "foo")
453      (irregex-extract '(or (: bow "foo" eow) any) "foo foo bar foo"))
454  (test-equal '("f" "o" "o" "b" "a" "r" "b" "a" "z")
455      (irregex-split (irregex "") "foobarbaz"))
456  (test-equal '("f" "b" "r" "b" "z")
457      (irregex-split (irregex "[aeiou]*") "foobarbaz"))
458  (test-equal '("" "oo" "" "a" "" "" "a" "")
459      (irregex-extract (irregex "[aeiou]*") "foobarbaz"))
460  (test-equal '("Line 1\n" "Line 2\n" "Line 3")
461      (irregex-split 'bol "Line 1\nLine 2\nLine 3"))
462  (test-equal '("foo\n" "bar\n" "baz\n")
463      (irregex-extract '(: bol (+ alpha) newline) "\nfoo\nbar\nbaz\n"))
464  (test-equal '("\nblah" "\nblah" "\nblah")
465      (irregex-extract '(: newline "blah" eol) "\nblah\nblah\nblah\n"))
466  )
467
468
469(test-group "parsing"
470  (test-equal "c+" (sre->string '(+ "c")))
471  (test-equal "(?:abc)+" (sre->string '(+ "abc")))
472  (test-equal "(?:abc|def)+" (sre->string '(+ (or "abc" "def"))))
473  (test-equal '(+ #\c) (string->sre "c+"))
474  (test-equal '(+ "abc") (string->sre "(?:abc)+"))
475  (test-equal '(+ (or "abc" "def")) (string->sre "(?:abc|def)+"))
476  )
477
478(define (extract name irx str)
479  (irregex-match-substring (irregex-match irx str) name))
480(define (valid? name irx str)
481  (irregex-match-valid-index? (irregex-match irx str) name))
482(define (start-idx name irx str)
483  (irregex-match-start-index (irregex-match irx str) name))
484(define (end-idx name irx str)
485  (irregex-match-end-index (irregex-match irx str) name))
486
487(test-group "named submatches"
488  (test-equal "matching submatch is seen and extracted"
489              "first" (extract 'first `(or (submatch-named first "first")
490                                           (submatch-named second "second"))
491                               "first"))
492  (test-assert "matching submatch index is valid"
493               (valid? 'first `(or (submatch-named first "first")
494                                   (submatch-named second "second"))
495                       "first"))
496  (test-equal "nonmatching submatch is known but returns false"
497              #f
498              (extract 'second `(or (submatch-named first "first")
499                                    (submatch-named second "second"))
500                       "first"))
501  (test-assert "nonmatching submatch index is valid"
502               (valid? 'second `(or (submatch-named first "first")
503                                    (submatch-named second "second"))
504                       "first"))
505  (test-error "nonexisting submatch is unknown and raises an error"
506              (extract 'third `(or (submatch-named first "first")
507                                   (submatch-named second "second"))
508                       "first"))
509  (test-assert "nonexisting submatch index is invalid"
510               (not (valid? 'third `(or (submatch-named first "first")
511                                         (submatch-named second "second"))
512                            "first")))
513  (test-equal "matching alternative is used"
514              "first" (extract 'sub `(or (submatch-named sub "first")
515                                         (submatch-named sub "second"))
516                               "first"))
517  (test-equal "matching alternative is used (second match)"
518              "second" (extract 'sub `(or (submatch-named sub "first")
519                                          (submatch-named sub "second"))
520                                "second"))
521  (test-equal "last match is used with multiple matches for a name"
522              "second" (extract 'sub `(seq (submatch-named sub "first")
523                                           space
524                                           (submatch-named sub "second"))
525                                "first second"))
526  (test-equal "submatch start"
527              1
528              (start-idx 'xs `(seq "a" (submatch-named xs (+ "x")) "b") "axxxb"))
529  (test-error "unknown submatch start"
530              (start-idx 'xs `(seq "a" (submatch-named ys (+ "x")) "b") "axxxb"))
531  (test-equal "submatch end"
532              4 (end-idx 'xs `(seq "a" (submatch-named xs (+ "x")) "b") "axxxb"))
533  (test-error "unknown submatch start"
534              (end-idx 'xs `(seq "a" (submatch-named ys (+ "x")) "b") "axxxb")))
535
536;; This is here to help optimized implementations catch segfaults and
537;; other such problems.  These calls will always return errors in plain
538;; Scheme, but only because it will try to use the invalid object in a
539;; way that's not supported by the operator.  Once Scheme grows a
540;; standardized way of signaling and catching exceptions, these tests
541;; should be changed and expanded to check for specific condition types,
542;; and probably moved to the group where the procedure is being tested.
543(test-group "error handling"
544  (test-error (irregex 'invalid-sre))
545  (test-error (string->irregex 'not-a-string))
546  (test-error (sre->irregex 'invalid-sre))
547  
548  (test-error (irregex-search 'not-an-irx-or-sre "foo"))
549  (test-error (irregex-search "foo" 'not-a-string))
550  (test-error (irregex-search "foo" "foo" 'not-a-number))
551  (test-error (irregex-search "foo" "foo" 0 'not-a-number))
552
553  ;; TODO: irregex-new-matches, irregex-reset-matches!
554  ;; irregex-search/matches, make-irregex-chunker?
555
556  (test-error (irregex-match-valid-index? 'not-a-match-object 0))
557  (test-error (irregex-match-start-index 'not-a-match-object 0))
558  (test-error (irregex-match-start-index (irregex-search "foo" "foo") -1))
559  (test-error (irregex-match-end-index 'not-a-match-object 0))
560  (test-error (irregex-match-end-index (irregex-search "foo" "foo") -1))
561  
562  (test-error (irregex-match-start-chunk 'not-a-match-object 0))
563  (test-error (irregex-match-end-chunk 'not-a-match-object 0))
564  (test-error (irregex-match-substring 'not-a-match-object 0))
565  (test-error (irregex-match-subchunk 'not-a-match-object 0))
566  (test-error (irregex-match-num-submatches 'not-a-match-object))
567  (test-error (irregex-match-names 'not-a-match-object))
568  (test-error (irregex-num-submatches 'not-an-irx))
569  (test-error (irregex-names 'not-an-irx))
570  
571  (test-error (irregex-fold 'not-an-irx (lambda x x) 0 "foo" (lambda x x) 0 3))
572  (test-error (irregex-fold "foo" 'not-a-proc 0 "foo" (lambda x x) 0 3))
573  (test-error (irregex-fold "foo" (lambda (a b) b) 0 'not-a-string
574                            (lambda x x) 0 3))
575  (test-error (irregex-fold "foo" (lambda (a b) b) 0 "foo" 'not-a-proc 0 3))
576  (test-error (irregex-fold "foo" (lambda (a b) b) 0 "foo" (lambda x x)
577                            'not-a-number 3))
578  (test-error (irregex-fold "foo" (lambda (a b) b) 0 "foo" (lambda x x) 0
579                            'not-a-number))
580
581  (test-error (irregex-replace 'not-an-irx "str"))
582  (test-error (irregex-replace "foo" "foo" (lambda (x) 'not-a-string)))
583  (test-error (irregex-replace/all 'not-an-irx "str"))
584  (test-error (irregex-replace/all "foo" "foo" (lambda (x) 'not-a-string)))
585
586  ;; Are these supposed to be exported?
587  ;; irregex-nfa, irregex-dfa, irregex-dfa/search, irregex-dfa/extract
588  ;; irregex-flags, irregex-lengths
589  )
590
591(test-group "SRE representation edge cases"
592  ;; NFA compilation skipped alternative after empty sequence (#26, found by John Clements)
593  (test-equal "empty sequence in \"or\""
594              ""
595              (irregex-match-substring (irregex-search `(or (seq) "foo") "")))
596  (test-equal "alternative to empty sequence in \"or\""
597              "foo"
598              (irregex-match-substring (irregex-search `(or (seq) "foo") "foo"))))
599
600(test-end)
601
602
603;;; UTF-8 tests
604
605(test-begin "utf-8 tests")
606
607(test-assert (irregex-search "(?u:<..>)" "<漢字>"))
608(test-assert (irregex-search "(?u:<.*>)" "<漢字>"))
609(test-assert (irregex-search "(?u:<.+>)" "<漢字>"))
610(test-assert (not (irregex-search "(?u:<.>)" "<漢字>")))
611(test-assert (not (irregex-search "(?u:<...>)" "<漢>")))
612
613(test-assert (irregex-search "(?u:<[^a-z]*>)" "<漢字>"))
614(test-assert (not (irregex-search "(?u:<[^a-z]*>)" "<漢m字>")))
615(test-assert (irregex-search "(?u:<[^a-z][^a-z]>)" "<漢字>"))
616(test-assert (irregex-search "(?u:<あ*>)" "<あ>"))
617(test-assert (irregex-search "(?u:<あ*>)" "<ああ>"))
618(test-assert (not (irregex-search "(?u:<あ*>)" "<あxあ>")))
619
620(test-assert (irregex-search "(?u:<[あ-ん]*>)" "<あん>"))
621(test-assert (irregex-search "(?u:<[あ-ん]*>)" "<ひらがな>"))
622(test-assert (not (irregex-search "(?u:<[あ-ん]*>)" "<ひらgがな>")))
623(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>")))
624
625(test-assert (irregex-search "(?u:<[^あ-ん]*>)" "<abc>"))
626(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<あん>")))
627(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<ひらがな>")))
628(test-assert (irregex-search "(?u:<[^あ-ん語]*>)" "<abc>"))
629(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<あん>")))
630(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<ひらがな>")))
631(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>")))
632
633(test-assert (not (irregex-search (irregex "[一二]" 'utf8 #t) "三四")))
634
635(test-end)
636
637(test-exit)
Trap