~ chicken-core (master) /tests/test-irregex.scm
Trap1;;;: test-irregex.scm
2
3
4(import (only chicken.string string-split)
5 (rename (only chicken.string string-intersperse) (string-intersperse string-join)) ;; Avoid srfi-13
6 chicken.format chicken.io chicken.irregex chicken.port)
7
8(include "test.scm")
9
10(import (only (scheme base) open-output-string get-output-string open-input-string))
11
12(define (cat . args)
13 (let ((out (open-output-string)))
14 (for-each (lambda (x) (display x out)) args)
15 (get-output-string out)))
16
17(define (warning . args)
18 (for-each (lambda (x) (display x (current-error-port))) args)
19 (newline (current-error-port)))
20
21(define (call-with-input-file file proc)
22 (let* ((in (open-input-file file))
23 (res (proc in)))
24 (close-input-port in)
25 res))
26
27(define (call-with-input-string str proc)
28 (let* ((in (open-input-string str))
29 (res (proc in)))
30 (close-input-port in)
31 res))
32
33(define (call-with-output-string proc)
34 (let ((out (open-output-string)))
35 (proc out)
36 (let ((res (get-output-string out)))
37 (close-output-port out)
38 res)))
39
40(define (port-for-each proc read . o)
41 (let ((in (if (pair? o) (car o) (current-input-port))))
42 (let lp ()
43 (let ((x (read in)))
44 (unless (eof-object? x)
45 (proc x)
46 (lp))))))
47
48(define (subst-matches matches subst)
49 (define (submatch n)
50 (if (irregex-match-data? matches)
51 (and (irregex-match-valid-index? matches n)
52 (irregex-match-substring matches n))
53 (list-ref matches n)))
54 (and
55 matches
56 (call-with-output-string
57 (lambda (out)
58 (call-with-input-string subst
59 (lambda (in)
60 (let lp ()
61 (let ((c (read-char in)))
62 (cond
63 ((not (eof-object? c))
64 (case c
65 ((#\&)
66 (display (or (submatch 0) "") out))
67 ((#\\)
68 (let ((c (read-char in)))
69 (if (char-numeric? c)
70 (let lp ((res (list c)))
71 (if (and (char? (peek-char in))
72 (char-numeric? (peek-char in)))
73 (lp (cons (read-char in) res))
74 (display
75 (or (submatch (string->number
76 (list->string (reverse res))))
77 "")
78 out)))
79 (write-char c out))))
80 (else
81 (write-char c out)))
82 (lp)))))))))))
83
84(define (test-re matcher line)
85 (let ((splt (string-split line "\t" #t)))
86 (if (list? splt)
87 (apply
88 (lambda (pattern input result subst output)
89 (let ((name (cat pattern " " input " " result " " subst)))
90 (cond
91 ((equal? "c" result)
92 (test-error name (matcher pattern input)))
93 ((equal? "n" result)
94 (test-assert name (not (matcher pattern input))))
95 (else
96 (test-equal name output
97 (subst-matches (matcher pattern input) subst))))))
98 splt)
99 (warning "invalid regex test line" line))))
100
101(test-begin "basic irregex tests")
102
103;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
104;; basic irregex
105
106(for-each
107 (lambda (opts)
108 (test-group (cat "irregex - " opts)
109 (call-with-input-file "re-tests.txt"
110 (lambda (in)
111 (port-for-each
112 (lambda (line)
113 (test-re (lambda (pat str)
114 (irregex-search (apply irregex pat opts) str))
115 line))
116 read-line
117 in)))))
118 '((backtrack)
119 (fast)
120 ))
121
122;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
123;; ;; chunked irregex
124
125(define (rope . args)
126 (map (lambda (x) (if (pair? x) x (list x 0 (string-length x)))) args))
127
128(define rope-chunker
129 (make-irregex-chunker
130 (lambda (x) (and (pair? (cdr x)) (cdr x)))
131 caar
132 cadar
133 caddar
134 (lambda (src1 i src2 j)
135 (if (eq? src1 src2)
136 (substring (caar src1) i j)
137 (let lp ((src (cdr src1))
138 (res (list (substring (caar src1) i (caddar src1)))))
139 (if (eq? src src2)
140 (string-join
141 (reverse (cons (substring (caar src2) (cadar src2) j) res))
142 "")
143 (lp (cdr src)
144 (cons (substring (caar src) (cadar src) (caddar src))
145 res))))))))
146
147(define (make-ropes str)
148 (let ((len (string-length str)))
149 (case len
150 ((0 1)
151 (list (rope str)))
152 ((2)
153 (list (rope str)
154 (rope (substring str 0 1) (substring str 1 2))))
155 ((3)
156 (list (rope str)
157 (rope (substring str 0 1) (substring str 1 3))
158 (rope (substring str 0 2) (substring str 2 3))
159 (rope (substring str 0 1)
160 (substring str 1 2)
161 (substring str 2 3))))
162 (else
163 (let ((mid (quotient (+ len 1) 2)))
164 (list (rope str)
165 (rope (substring str 0 1) (substring str 1 len))
166 (rope (substring str 0 mid) (substring str mid len))
167 (rope (substring str 0 (- len 1))
168 (substring str (- len 1) len))
169 (rope (substring str 0 1)
170 (substring str 1 mid)
171 (substring str mid len))
172 ))))))
173
174(define (make-shared-ropes str)
175 (let ((len (string-length str)))
176 (case len
177 ((0 1)
178 '())
179 ((2)
180 (list (list (list str 0 1) (list str 1 2))))
181 ((3)
182 (list (list (list str 0 1) (list str 1 3))
183 (list (list str 0 2) (list str 2 3))
184 (list (list str 0 1) (list str 1 2) (list str 2 3))))
185 (else
186 (let ((mid (quotient (+ len 1) 2)))
187 (list (list (list str 0 1) (list str 1 len))
188 (list (list str 0 mid) (list str mid len))
189 (list (list str 0 (- len 1))
190 (list str (- len 1) len))
191 (list (list str 0 1) (list str 1 mid) (list str mid len))
192 ))))))
193
194(for-each
195 (lambda (opts)
196 (test-group (cat "irregex/chunked - " opts)
197 (call-with-input-file "re-tests.txt"
198 (lambda (in)
199 (port-for-each
200 (lambda (line)
201 (let ((splt (string-split line "\t" #t)))
202 (if (list? splt)
203 (apply
204 (lambda (pattern input result subst output)
205 (let ((name
206 (cat pattern " " input " " result " " subst)))
207 (cond
208 ((equal? "c" result))
209 ((equal? "n" result)
210 (for-each
211 (lambda (rope)
212 (test-assert name
213 (not (irregex-search/chunked pattern
214 rope-chunker
215 rope))))
216 (append (make-ropes input)
217 (make-shared-ropes input))))
218 (else
219 (for-each
220 (lambda (rope)
221 (test-equal
222 name output
223 (subst-matches (irregex-search/chunked pattern
224 rope-chunker
225 rope)
226 subst)))
227 (append (make-ropes input)
228 (make-shared-ropes input)))))))
229 splt)
230 (warning "invalid regex test line" line))))
231 read-line
232 in)))))
233 '((backtrack)
234 (fast)
235 ))
236
237;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
238;; pregexp
239
240'(test-group "pregexp"
241 (call-with-input-file "re-tests.txt"
242 (lambda (in)
243 (port-for-each
244 (lambda (line) (test-re pregexp-match line))
245 read-line
246 in))))
247
248;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
249;; default regex (PCRE)
250
251'(test-group "regex"
252 (call-with-input-file "re-tests.txt"
253 (lambda (in)
254 (port-for-each
255 (lambda (line) (test-re string-search line))
256 read-line
257 in))))
258
259;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
260
261(test-group "unmatchable patterns"
262 (test-assert (not (irregex-search '(or) "abc")))
263 (test-assert (not (irregex-search '(: "ab" (or)) "abc")))
264 (test-assert (not (irregex-search '(submatch "ab" (or)) "abc")))
265 (test-assert (not (irregex-search '(: "ab" (submatch (or))) "abc")))
266 (test-assert (not (irregex-search '(/) "abc")))
267 (test-assert (not (irregex-search '(: "ab" (/)) "abc")))
268 (test-assert (not (irregex-search '(~ any) "abc")))
269 (test-assert (not (irregex-search '(: "ab" (~ any)) "abc")))
270 (test-assert (not (irregex-search '("") "abc")))
271 (test-assert (not (irregex-search '(: "ab" ("")) "abc")))
272 (test-assert (not (irregex-search '(: (+ print) white) "abc")))
273 )
274
275;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
276
277(test-group "beginning/end of chunks"
278 (test-assert
279 (irregex-search/chunked '(: bos "foo") rope-chunker '((" foo" 0 4)) 1))
280 (test-assert
281 (irregex-search/chunked '(: bos "foo") rope-chunker '((" foo" 1 5)) 2))
282 (test-assert
283 (irregex-search/chunked '(: bos "foo" eos) rope-chunker '((" foo" 1 4)) 1))
284 (test-assert
285 (irregex-search/chunked '(: bos "foo" eos) rope-chunker '((" foo" 2 5)) 2))
286 (test-assert
287 (irregex-search/chunked '(: bos "foo" eos) rope-chunker '((" foo" 0 4)) 1))
288 (test-assert
289 (irregex-search/chunked '(: bos "foo" eos) rope-chunker '((" foo" 1 5)) 2))
290 )
291
292;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
293
294(test-group "Case sensitivity"
295 (test-assert
296 (not (irregex-match '(seq "abc") "ABC")))
297 (test-assert
298 (irregex-match (irregex '(seq "abc") 'case-insensitive) "ABC"))
299 (test-assert
300 (irregex-match '(w/nocase "abc") "ABC"))
301 (test-assert
302 (not (irregex-match '(w/nocase (w/case "abc")) "ABC")))
303 (test-assert
304 (irregex-match '(w/nocase (* ("abc"))) "ABC"))
305 (test-assert
306 (not (irregex-match '(w/nocase (w/case (* ("abc")))) "ABC")))
307 (test-assert
308 (irregex-match '(w/nocase (* (/ #\a #\c))) "ABC"))
309 (test-assert
310 (not (irregex-match '(w/nocase (w/case (/ #\a #\c))) "ABC")))
311 (test-assert
312 (not (irregex-match '(w/nocase (* (~ (/ #\a #\c)))) "abc")))
313 (test-assert
314 (not (irregex-match '(w/nocase (* (~ (/ #\a #\c)))) "ABC"))))
315
316;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
317
318(test-group "API"
319 (test-group "predicates"
320 (test-assert (irregex? (irregex "a.*b")))
321 (test-assert (irregex? (irregex '(: "a" (* any) "b"))))
322 (test-assert (not (irregex? (vector '*irregex-tag* #f #f #f #f #f #f))))
323 (test-assert (not (irregex? (vector #f #f #f #f #f #f #f #f))))
324 (test-assert (irregex-match-data? (irregex-search "a.*b" "axxxb")))
325 (test-assert (irregex-match-data? (irregex-match "a.*b" "axxxb")))
326 (test-assert (not (irregex-match-data? (vector '*irregex-match-tag* #f #f #f #f #f #f #f #f #f))))
327 (test-assert (not (irregex-match-data? (vector #f #f #f #f #f #f #f #f #f #f #f)))))
328 (test-group "valid index"
329 (test-assert
330 (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 0))
331 (test-assert
332 (not (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 1)))
333 (test-assert
334 (not (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") -1)))
335 (test-assert
336 (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "axxx") 0))
337 (test-assert
338 (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "axxx") 1))
339 (test-assert
340 (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "axxx") 2))
341 (test-assert
342 (irregex-match-valid-index? (irregex-search "a(.*)|(b)" "b") 2))
343 (test-assert
344 (not (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 3)))
345 (test-assert
346 (not (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") -1))))
347 (test-group "number of submatches"
348 (test-equal 0 (irregex-num-submatches (irregex "a.*b")))
349 (test-equal 1 (irregex-num-submatches (irregex "a(.*)b")))
350 (test-equal 2 (irregex-num-submatches (irregex "(a(.*))b")))
351 (test-equal 2 (irregex-num-submatches (irregex "a(.*)(b)")))
352 (test-equal 10 (irregex-num-submatches (irregex "((((((((((a))))))))))")))
353 (test-equal 0 (irregex-match-num-submatches (irregex-search "a.*b" "axxxb")))
354 (test-equal 1 (irregex-match-num-submatches (irregex-search "a(.*)b" "axxxb")))
355 (test-equal 2 (irregex-match-num-submatches (irregex-search "(a(.*))b" "axxxb")))
356 (test-equal 2 (irregex-match-num-submatches (irregex-search "a(.*)(b)" "axxxb")))
357 (test-equal 10 (irregex-match-num-submatches (irregex-search "((((((((((a))))))))))" "a"))))
358 (test-group "match substring"
359 (test-equal "axxxb" (irregex-match-substring (irregex-search "a.*b" "axxxb") 0))
360 (test-error (irregex-match-substring (irregex-search "a.*b" "axxxb") 1))
361 (test-equal "xxx" (irregex-match-substring (irregex-search "a(.*)|b" "axxx") 1))
362 (test-equal #f (irregex-match-substring (irregex-search "a(.*)|b" "b") 1))
363 (test-error (irregex-match-substring (irregex-search "a(.*)|b" "axxx") 2))
364 (test-error (irregex-match-substring (irregex-search "a(.*)|b" "b") 2)))
365 (test-group "match start-index"
366 (test-equal 0 (irregex-match-start-index (irregex-search "a.*b" "axxxb") 0))
367 (test-error (irregex-match-start-index (irregex-search "a.*b" "axxxb") 1))
368 (test-equal 1 (irregex-match-start-index (irregex-search "a(.*)|b" "axxx") 1))
369 (test-equal #f (irregex-match-start-index (irregex-search "a(.*)|b" "b") 1))
370 (test-error (irregex-match-start-index (irregex-search "a(.*)|b" "axxx") 2))
371 (test-error (irregex-match-start-index (irregex-search "a(.*)|b" "b") 2)))
372 (test-group "match end-index"
373 (test-equal 5 (irregex-match-end-index (irregex-search "a.*b" "axxxb") 0))
374 (test-error (irregex-match-end-index (irregex-search "a.*b" "axxxb") 1))
375 (test-equal 4 (irregex-match-end-index (irregex-search "a(.*)|b" "axxx") 1))
376 (test-equal #f (irregex-match-end-index (irregex-search "a(.*)|b" "b") 1))
377 (test-error (irregex-match-end-index (irregex-search "a(.*)|b" "axxx") 2))
378 (test-error (irregex-match-end-index (irregex-search "a(.*)|b" "b") 2)))
379 )
380
381;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
382
383(test-group "utils"
384 (test-equal "h*llo world"
385 (irregex-replace "[aeiou]" "hello world" "*"))
386 (test-equal "hello world"
387 (irregex-replace "[xyz]" "hello world" "*"))
388 (test-equal "h*ll* w*rld"
389 (irregex-replace/all "[aeiou]" "hello world" "*"))
390 (test-equal '("bob@test.com" "fred@example.com")
391 (irregex-fold 'email
392 (lambda (i m s) (cons (irregex-match-substring m) s))
393 '()
394 "bob@test.com and fred@example.com"
395 (lambda (i s) (reverse s))))
396 (test-equal '("bob@test.com" "fred@example.com")
397 (irregex-fold/chunked
398 'email
399 (lambda (src i m s) (cons (irregex-match-substring m) s))
400 '()
401 rope-chunker
402 (rope "bob@test.com and fred@example.com")
403 (lambda (src i s) (reverse s))))
404 (test-equal '("poo poo ")
405 (irregex-fold '(+ "poo ")
406 (lambda (i m s)
407 (if (< i (irregex-match-end-index m 0))
408 (cons (irregex-match-substring m) s)
409 s))
410 '()
411 "poo poo platter"))
412 (test-equal "* x "
413 (irregex-replace/all
414 (irregex '(: bos #\space) 'backtrack) " x " "*"))
415 (test-equal "* x "
416 (irregex-replace/all
417 (irregex '(: bos #\space) 'dfa) " x " "*"))
418 (test-equal "***x***"
419 (irregex-replace/all
420 (irregex '(: #\space) 'backtrack) " x " "*"))
421 (test-equal "***x***"
422 (irregex-replace/all
423 (irregex '(: #\space) 'dfa) " x " "*"))
424 (test-equal "A:42"
425 (irregex-replace/all "^" "42" "A:"))
426 (test-equal "A:42"
427 (irregex-replace/all 'bos "42" "A:"))
428 (test-equal "A:42"
429 (irregex-replace/all 'bol "42" "A:"))
430 (test-equal "xaac"
431 (irregex-replace/all
432 (irregex '(or (seq bos "a") (seq bos "b")) 'backtrack) "aaac" "x"))
433 (test-equal "xaac"
434 (irregex-replace/all
435 (irregex '(or (seq bos "a") (seq bos "b")) 'dfa) "aaac" "x"))
436 (test-equal "xaac"
437 (irregex-replace/all (irregex '(or (seq bos "a") "b") 'backtrack)
438 "aaac" "x"))
439 (test-equal "xaac"
440 (irregex-replace/all (irregex '(or (seq bos "a") "b") 'dfa)
441 "aaac" "x"))
442 (test-equal "*Line 1\n*Line 2"
443 (irregex-replace/all 'bol "Line 1\nLine 2" "*"))
444 (test-equal "**p*l*a*t*t*e*r"
445 (irregex-replace/all '(* "poo ") "poo poo platter" "*"))
446 (test-equal "x- y- z-"
447 (irregex-replace/all '(: (look-behind (or "x" "y" "z")) "a")
448 "xa ya za" "-"))
449 (test-equal "any gosh darned string"
450 (irregex-replace/all '(: bos (* whitespace))
451 "any gosh darned string" ""))
452 (test-equal '("foo" " " "foo" " " "b" "a" "r" " " "foo")
453 (irregex-extract '(or (: bow "foo" eow) any) "foo foo bar foo"))
454 (test-equal '("f" "o" "o" "b" "a" "r" "b" "a" "z")
455 (irregex-split (irregex "") "foobarbaz"))
456 (test-equal '("f" "b" "r" "b" "z")
457 (irregex-split (irregex "[aeiou]*") "foobarbaz"))
458 (test-equal '("" "oo" "" "a" "" "" "a" "")
459 (irregex-extract (irregex "[aeiou]*") "foobarbaz"))
460 (test-equal '("Line 1\n" "Line 2\n" "Line 3")
461 (irregex-split 'bol "Line 1\nLine 2\nLine 3"))
462 (test-equal '("foo\n" "bar\n" "baz\n")
463 (irregex-extract '(: bol (+ alpha) newline) "\nfoo\nbar\nbaz\n"))
464 (test-equal '("\nblah" "\nblah" "\nblah")
465 (irregex-extract '(: newline "blah" eol) "\nblah\nblah\nblah\n"))
466 )
467
468
469(test-group "parsing"
470 (test-equal "c+" (sre->string '(+ "c")))
471 (test-equal "(?:abc)+" (sre->string '(+ "abc")))
472 (test-equal "(?:abc|def)+" (sre->string '(+ (or "abc" "def"))))
473 (test-equal '(+ #\c) (string->sre "c+"))
474 (test-equal '(+ "abc") (string->sre "(?:abc)+"))
475 (test-equal '(+ (or "abc" "def")) (string->sre "(?:abc|def)+"))
476 )
477
478(define (extract name irx str)
479 (irregex-match-substring (irregex-match irx str) name))
480(define (valid? name irx str)
481 (irregex-match-valid-index? (irregex-match irx str) name))
482(define (start-idx name irx str)
483 (irregex-match-start-index (irregex-match irx str) name))
484(define (end-idx name irx str)
485 (irregex-match-end-index (irregex-match irx str) name))
486
487(test-group "named submatches"
488 (test-equal "matching submatch is seen and extracted"
489 "first" (extract 'first `(or (submatch-named first "first")
490 (submatch-named second "second"))
491 "first"))
492 (test-assert "matching submatch index is valid"
493 (valid? 'first `(or (submatch-named first "first")
494 (submatch-named second "second"))
495 "first"))
496 (test-equal "nonmatching submatch is known but returns false"
497 #f
498 (extract 'second `(or (submatch-named first "first")
499 (submatch-named second "second"))
500 "first"))
501 (test-assert "nonmatching submatch index is valid"
502 (valid? 'second `(or (submatch-named first "first")
503 (submatch-named second "second"))
504 "first"))
505 (test-error "nonexisting submatch is unknown and raises an error"
506 (extract 'third `(or (submatch-named first "first")
507 (submatch-named second "second"))
508 "first"))
509 (test-assert "nonexisting submatch index is invalid"
510 (not (valid? 'third `(or (submatch-named first "first")
511 (submatch-named second "second"))
512 "first")))
513 (test-equal "matching alternative is used"
514 "first" (extract 'sub `(or (submatch-named sub "first")
515 (submatch-named sub "second"))
516 "first"))
517 (test-equal "matching alternative is used (second match)"
518 "second" (extract 'sub `(or (submatch-named sub "first")
519 (submatch-named sub "second"))
520 "second"))
521 (test-equal "last match is used with multiple matches for a name"
522 "second" (extract 'sub `(seq (submatch-named sub "first")
523 space
524 (submatch-named sub "second"))
525 "first second"))
526 (test-equal "submatch start"
527 1
528 (start-idx 'xs `(seq "a" (submatch-named xs (+ "x")) "b") "axxxb"))
529 (test-error "unknown submatch start"
530 (start-idx 'xs `(seq "a" (submatch-named ys (+ "x")) "b") "axxxb"))
531 (test-equal "submatch end"
532 4 (end-idx 'xs `(seq "a" (submatch-named xs (+ "x")) "b") "axxxb"))
533 (test-error "unknown submatch start"
534 (end-idx 'xs `(seq "a" (submatch-named ys (+ "x")) "b") "axxxb")))
535
536;; This is here to help optimized implementations catch segfaults and
537;; other such problems. These calls will always return errors in plain
538;; Scheme, but only because it will try to use the invalid object in a
539;; way that's not supported by the operator. Once Scheme grows a
540;; standardized way of signaling and catching exceptions, these tests
541;; should be changed and expanded to check for specific condition types,
542;; and probably moved to the group where the procedure is being tested.
543(test-group "error handling"
544 (test-error (irregex 'invalid-sre))
545 (test-error (string->irregex 'not-a-string))
546 (test-error (sre->irregex 'invalid-sre))
547
548 (test-error (irregex-search 'not-an-irx-or-sre "foo"))
549 (test-error (irregex-search "foo" 'not-a-string))
550 (test-error (irregex-search "foo" "foo" 'not-a-number))
551 (test-error (irregex-search "foo" "foo" 0 'not-a-number))
552
553 ;; TODO: irregex-new-matches, irregex-reset-matches!
554 ;; irregex-search/matches, make-irregex-chunker?
555
556 (test-error (irregex-match-valid-index? 'not-a-match-object 0))
557 (test-error (irregex-match-start-index 'not-a-match-object 0))
558 (test-error (irregex-match-start-index (irregex-search "foo" "foo") -1))
559 (test-error (irregex-match-end-index 'not-a-match-object 0))
560 (test-error (irregex-match-end-index (irregex-search "foo" "foo") -1))
561
562 (test-error (irregex-match-start-chunk 'not-a-match-object 0))
563 (test-error (irregex-match-end-chunk 'not-a-match-object 0))
564 (test-error (irregex-match-substring 'not-a-match-object 0))
565 (test-error (irregex-match-subchunk 'not-a-match-object 0))
566 (test-error (irregex-match-num-submatches 'not-a-match-object))
567 (test-error (irregex-match-names 'not-a-match-object))
568 (test-error (irregex-num-submatches 'not-an-irx))
569 (test-error (irregex-names 'not-an-irx))
570
571 (test-error (irregex-fold 'not-an-irx (lambda x x) 0 "foo" (lambda x x) 0 3))
572 (test-error (irregex-fold "foo" 'not-a-proc 0 "foo" (lambda x x) 0 3))
573 (test-error (irregex-fold "foo" (lambda (a b) b) 0 'not-a-string
574 (lambda x x) 0 3))
575 (test-error (irregex-fold "foo" (lambda (a b) b) 0 "foo" 'not-a-proc 0 3))
576 (test-error (irregex-fold "foo" (lambda (a b) b) 0 "foo" (lambda x x)
577 'not-a-number 3))
578 (test-error (irregex-fold "foo" (lambda (a b) b) 0 "foo" (lambda x x) 0
579 'not-a-number))
580
581 (test-error (irregex-replace 'not-an-irx "str"))
582 (test-error (irregex-replace "foo" "foo" (lambda (x) 'not-a-string)))
583 (test-error (irregex-replace/all 'not-an-irx "str"))
584 (test-error (irregex-replace/all "foo" "foo" (lambda (x) 'not-a-string)))
585
586 ;; Are these supposed to be exported?
587 ;; irregex-nfa, irregex-dfa, irregex-dfa/search, irregex-dfa/extract
588 ;; irregex-flags, irregex-lengths
589 )
590
591(test-group "SRE representation edge cases"
592 ;; NFA compilation skipped alternative after empty sequence (#26, found by John Clements)
593 (test-equal "empty sequence in \"or\""
594 ""
595 (irregex-match-substring (irregex-search `(or (seq) "foo") "")))
596 (test-equal "alternative to empty sequence in \"or\""
597 "foo"
598 (irregex-match-substring (irregex-search `(or (seq) "foo") "foo"))))
599
600(test-end)
601
602
603;;; UTF-8 tests
604
605(test-begin "utf-8 tests")
606
607(test-assert (irregex-search "(?u:<..>)" "<漢字>"))
608(test-assert (irregex-search "(?u:<.*>)" "<漢字>"))
609(test-assert (irregex-search "(?u:<.+>)" "<漢字>"))
610(test-assert (not (irregex-search "(?u:<.>)" "<漢字>")))
611(test-assert (not (irregex-search "(?u:<...>)" "<漢>")))
612
613(test-assert (irregex-search "(?u:<[^a-z]*>)" "<漢字>"))
614(test-assert (not (irregex-search "(?u:<[^a-z]*>)" "<漢m字>")))
615(test-assert (irregex-search "(?u:<[^a-z][^a-z]>)" "<漢字>"))
616(test-assert (irregex-search "(?u:<あ*>)" "<あ>"))
617(test-assert (irregex-search "(?u:<あ*>)" "<ああ>"))
618(test-assert (not (irregex-search "(?u:<あ*>)" "<あxあ>")))
619
620(test-assert (irregex-search "(?u:<[あ-ん]*>)" "<あん>"))
621(test-assert (irregex-search "(?u:<[あ-ん]*>)" "<ひらがな>"))
622(test-assert (not (irregex-search "(?u:<[あ-ん]*>)" "<ひらgがな>")))
623(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>")))
624
625(test-assert (irregex-search "(?u:<[^あ-ん]*>)" "<abc>"))
626(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<あん>")))
627(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<ひらがな>")))
628(test-assert (irregex-search "(?u:<[^あ-ん語]*>)" "<abc>"))
629(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<あん>")))
630(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<ひらがな>")))
631(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>")))
632
633(test-assert (not (irregex-search (irregex "[一二]" 'utf8 #t) "三四")))
634
635(test-end)
636
637(test-exit)