~ chicken-core (chicken-5) 139f7e9cdba897bc0969e761aede66218fcabb11
commit 139f7e9cdba897bc0969e761aede66218fcabb11
Author: Peter Bex <peter.bex@xs4all.nl>
AuthorDate: Tue Sep 13 18:36:01 2011 +0200
Commit: Peter Bex <peter.bex@xs4all.nl>
CommitDate: Tue Sep 13 18:36:01 2011 +0200
Add torture test for numerical syntax and fix all failing testcases.
Invalid syntax that used to be OK, like "+inf" and "nan", is currently
still accepted but marked deprecated and the tests commented out.
In the next release
diff --git a/NEWS b/NEWS
index 3fd1332c..450c3c64 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,10 @@
- Runtime system
- fixed handling of "inf" and nan" floating-point predicates for Solaris
(thanks to Claude Marinier)
+ - deprecated "[+-]nan", "[+-]inf" and other notations "accidentally"
+ accepted by Chicken due to the underlying C library's strtod() function,
+ standardizing on "[+-]nan.0" and "[+-]inf.0" from R6RS (and soon R7RS),
+ when displaying numbers only these forms are generated now.
- support for re-loading of compiled files has now been completely
removed
diff --git a/runtime.c b/runtime.c
index 399713eb..5f8a0caa 100644
--- a/runtime.c
+++ b/runtime.c
@@ -7196,9 +7196,9 @@ void C_ccall C_quotient(C_word c, C_word closure, C_word k, C_word n1, C_word n2
C_regparm C_word C_fcall
C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0)
{
- int radix, radixpf = 0, sharpf = 0, ratp = 0, exactf, exactpf = 0, periodf = 0;
+ int radix, radixpf = 0, sharpf = 0, ratf = 0, exactf, exactpf = 0, periodf = 0, expf = 0;
C_word n1, n;
- C_char *sptr, *eptr;
+ C_char *sptr, *eptr, *rptr;
double fn1, fn;
if(radix0 & C_FIXNUM_BIT) radix = C_unfix(radix0);
@@ -7219,47 +7219,73 @@ C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0)
buffer[ n ] = '\0';
while(*sptr == '#') {
- switch(*(++sptr)) {
+ switch(C_tolower((int)*(++sptr))) {
case 'b': if(radixpf) goto fail; else { radix = 2; radixpf = 1; } break;
case 'o': if(radixpf) goto fail; else { radix = 8; radixpf = 1; } break;
case 'd': if(radixpf) goto fail; else { radix = 10; radixpf = 1; } break;
case 'x': if(radixpf) goto fail; else { radix = 16; radixpf = 1; } break;
case 'e': if(exactpf) goto fail; else { exactf = 1; exactpf = 1; } break;
case 'i': if(exactpf) goto fail; else { exactf = 0; exactpf = 1; } break;
- default: --sptr;
+ default: goto fail; /* Unknown prefix type */
}
++sptr;
}
-
- /* check for embedded '#'s and double '.'s: */
- for(eptr = sptr; *eptr != '\0'; ++eptr) {
- switch(*eptr) {
+
+ /* Scan for embedded special characters and do basic sanity checking: */
+ for(eptr = sptr, rptr = sptr; *eptr != '\0'; ++eptr) {
+ switch(C_tolower((int)*eptr)) {
case '.':
- if(periodf) goto fail;
+ if(periodf || ratf || expf) goto fail;
periodf = 1;
break;
case '#':
- if(eptr[ 1 ] == '\0' || C_strchr("#.0123456789", eptr[ 1 ]) != NULL) {
- sharpf = 1;
- *eptr = '0';
- }
- else goto fail;
+ if (expf || (eptr == rptr) ||
+ (!sharpf && (eptr == rptr+1) && (C_strchr("+-.", *rptr) != NULL)))
+ goto fail;
+
+ sharpf = 1;
+ *eptr = '0';
+
+ break;
+ case '/':
+ if(periodf || ratf || expf || eptr == sptr) goto fail;
+ sharpf = 0; /* Allow sharp signs in the denominator */
+ ratf = 1;
+ rptr = eptr+1;
+ break;
+ case 'e':
+ case 'd':
+ case 'f':
+ case 'l':
+ case 's':
+ /* Don't set exp flag if we see the "f" in "inf.0" (preceded by 'n') */
+ /* Other failure modes are handled elsewhere. */
+ if(radix == 10 && eptr > sptr && C_tolower((int)*(eptr-1)) != 'n') {
+ if (ratf) goto fail;
+
+ expf = 1;
+ sharpf = 0;
+ *eptr = 'e'; /* strtod() normally only understands 'e', not dfls */
+ }
+ break;
+ default:
+ if(sharpf) goto fail;
break;
}
}
-
+ if (eptr == rptr) goto fail; /* Disallow "empty" numbers like "#x" and "1/" */
+
/* check for rational representation: */
- if((eptr = C_strchr(sptr, '/')) != NULL) {
- if (eptr == sptr) {
- n = C_SCHEME_FALSE;
- goto fini;
+ if(rptr != sptr) {
+ if (*(rptr) == '-' || *(rptr) == '+') {
+ n = C_SCHEME_FALSE;
+ goto fini;
}
- *eptr = '\0';
- ratp = 1;
+ *(rptr-1) = '\0';
switch(convert_string_to_number(sptr, radix, &n1, &fn1)) {
case 0:
@@ -7273,7 +7299,7 @@ C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0)
/* case 2: nop */
}
- sptr = eptr + 1;
+ sptr = rptr;
}
/* convert number and return result: */
@@ -7283,8 +7309,8 @@ C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0)
break;
case 1: /* fixnum */
- if(sharpf || ratp || (exactpf && !exactf)) {
- n = C_flonum(a, ratp ? fn1 / (double)n : (double)n);
+ if(sharpf || ratf || (exactpf && !exactf)) {
+ n = C_flonum(a, ratf ? fn1 / (double)n : (double)n);
if(exactpf && exactf) n = C_i_inexact_to_exact(n);
}
@@ -7293,7 +7319,7 @@ C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0)
break;
case 2: /* flonum */
- n = C_flonum(a, ratp ? fn1 / fn : fn);
+ n = C_flonum(a, ratf ? fn1 / fn : fn);
if(exactpf && exactf) n = C_i_inexact_to_exact(n);
@@ -7354,28 +7380,35 @@ C_regparm C_word C_fcall convert_string_to_number(C_char *str, int radix, C_word
C_word n;
C_char *eptr, *eptr2;
double fn;
-#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__OpenBSD__)
int len = C_strlen(str);
- if(len >= 4) {
- if(!C_strncmp(str, "+nan.0", len)) {
- *flo = 0.0/0.0;
- return 2;
- }
- else if(!C_strncmp(str, "-nan.0", len)) {
- *flo = -0.0/0.0;
- return 2;
- }
- else if(!C_strncmp(str, "+inf.0", len)) {
- *flo = 1.0/0.0;
- return 2;
+ if(radix == 10) {
+ if (len >= 4 && len <= 6) { /* DEPRECATED, TODO: Change to (len == 4) */
+ if((*str == '+' || *str == '-') &&
+ C_strchr("inIN", *(str+1)) != NULL &&
+ C_strchr("naNA", *(str+2)) != NULL &&
+ C_strchr("fnFN", *(str+3)) != NULL &&
+ /* DEPRECATED, TODO: Rip out len checks */
+ (len == 4 || *(str+4) == '.') && (len == 5 || (*(str+5) == '0'))) {
+ if (*(str+1) == 'i' || *(str+1) == 'I') /* Inf */
+ *flo = 1.0/0.0;
+ else /* NaN */
+ *flo = 0.0/0.0;
+ if (*str == '-')
+ *flo *= -1.0;
+ return 2;
+ }
}
- else if(!C_strncmp(str, "-inf.0", len)) {
- *flo = -1.0/0.0;
- return 2;
+ /* DEPRECATED (enable in next release) */
+#if 0
+ /* This is disabled during the deprecation period of "+nan" syntax */
+ /* Prevent C parser from accepting things like "-inf" on its own... */
+ for(n = 0; n < len; ++n) {
+ if (C_strchr("+-0123456789e.", *(str+n)) == NULL)
+ return 0;
}
- }
#endif
+ }
if(C_strpbrk(str, "xX\0") != NULL) return 0;
@@ -7505,7 +7538,6 @@ void C_ccall C_number_to_string(C_word c, C_word closure, C_word k, C_word num,
}
}
-#if defined(__CYGWIN__) || defined(__MINGW32__)
if(C_isnan(f)) {
C_strcpy(p = buffer, "+nan.0");
goto fini;
@@ -7514,7 +7546,6 @@ void C_ccall C_number_to_string(C_word c, C_word closure, C_word k, C_word num,
C_sprintf(p = buffer, "%cinf.0", f > 0 ? '+' : '-');
goto fini;
}
-#endif
#ifdef HAVE_GCVT
p = C_gcvt(f, flonum_print_precision, buffer); /* p unused, but we want to avoid stupid warnings */
diff --git a/tests/numbers-string-conversion-tests.scm b/tests/numbers-string-conversion-tests.scm
new file mode 100644
index 00000000..c6e1d3ca
--- /dev/null
+++ b/tests/numbers-string-conversion-tests.scm
@@ -0,0 +1,280 @@
+;;;
+;;; Numerical syntax "torture test"
+;;;
+;;; This tries to test a lot of edge cases in Scheme's numerical syntax.
+;;;
+;;; Output is written so that if you run it through "grep ERROR" it will
+;;; output nothing (and exit status will be nonzero) if there are no errors.
+;;; If you run it through "tail -n 1" you will just get the total error summary.
+;;;
+;;; This code assumes that string->number accepts numbers with embedded radix
+;;; specifiers (R5RS mentions that it's allowed to return #f in those cases).
+;;; It also doesn't try to support Schemes which support *only* integers or
+;;; *only* flonums (which is also allowed by R5RS).
+;;;
+
+(define the-nan (fp/ 0.0 0.0))
+(define pos-inf (fp/ 1.0 0.0))
+(define neg-inf (fp/ -1.0 0.0))
+
+(define (nan? x) (and (number? x) (not (= x x))))
+
+(define total-errors 0)
+
+;; Here comes a horrible nasty hack. It seems to work though ;)
+(define-syntax test-numbers
+ (syntax-rules (compnums fractions)
+ ((_ (str value ...) rest ...)
+ (begin
+ (let ((res (string->number str)))
+ (if (not (or (and (not (string? value)) (equal? res value)) ...
+ (and res (nan? res) (or (and value (nan? value)) ...))))
+ (begin (display "PARSE ERROR ")
+ (write '(str value ...))
+ (display " => ") (write res) (newline)
+ (set! total-errors (+ total-errors 1)))
+ (let ((re-str (and res (number->string res))))
+ (if (not (or (and res (string=? re-str str))
+ (and (not res) (not value)) ...
+ (and res (string? value) (string=? re-str value)) ...))
+ (begin (display "SERIALIZATION ERROR ")
+ (write `(str value ...))
+ (display " => ") (write re-str) (newline)
+ (set! total-errors (+ total-errors 1)))
+ (begin (display "OK ")
+ (write '(str value ...)) (newline))))))
+ (test-numbers rest ...)))
+ ((_ "no-totals") #f)
+ ((_ x rest ...)
+ (begin (newline) (display "-> ") (display x) (newline)
+ (display "-----------------------------------------------------")
+ (newline)
+ (test-numbers rest ...)))
+ ((_)
+ (if (= 0 total-errors)
+ (begin (newline)
+ (display "-----> Everything OK, no errors!")
+ (newline))
+ (begin (newline)
+ (display "-----> TOTAL ERRORS: ")
+ (display total-errors)
+ (newline)
+ (error total-errors))))))
+
+(test-numbers
+ "Simple integers"
+ ("1" 1)
+ ("+1" 1 "1")
+ ("-1" (- 1))
+ ("#i1" 1.0 "1.0" "1.")
+ ("#I1" 1.0 "1.0" "1.")
+ ("#i-1" (- 1.0) "-1.0" "-1.")
+ ("-#i1" #f)
+ ("+-1" #f)
+ ("" #f)
+ ("-" #f)
+ ("+" #f)
+ ("+-" #f)
+
+ "Basic decimal notation"
+ ("1.0" (exact->inexact 1) "1.")
+ ("1." 1.0 "1.0" "1.")
+ ("1.#" 1.0 1.5 "1.0" "1." "1.5")
+ (".1" 0.1 "0.1" "100.0e-3")
+ ("-.1" (- 0.1) "-0.1" "-100.0e-3")
+ ;; Some Schemes don't allow negative zero. This is okay with the standard
+ ("-.0" -0.0 "-0." "-0.0" "0.0" "0." ".0")
+ ("-0." -0.0 "-.0" "-0.0" "0.0" "0." ".0")
+ ("." #f)
+ (".1." #f)
+ ("..1" #f)
+ ("1.." #f)
+ ("#i1.0" 1.0 "1.0" "1.")
+ ("#e1.0" 1 "1")
+ ("#e-.0" 0 "0")
+ ("#e-0." 0 "0")
+ ("-#e.0" #f)
+
+ "Decimal notation with padding"
+ ("1#" 10.0 15.0 "10.0" "15.0" "10." "15.")
+ ("#e1#" 10 15 "10" "15")
+ ("#E1#" 10 15 "10" "15")
+ ("#1" #f)
+ ("#" #f)
+ ("1#2" #f)
+ ("1.#2" #f)
+ (".#" #f)
+ ("#.#" #f)
+ ("#.1" #f)
+ ("1#.2" #f)
+ ("1#." 10.0 15.0 "10.0" "15.0" "10." "15.")
+
+ "Decimal notation with suffix"
+ ("1e2" 100.0 "100.0" "100.")
+ ("1E2" 100.0 "100.0" "100.")
+ ("1s2" 100.0 "100.0" "100.")
+ ("1S2" 100.0 "100.0" "100.")
+ ("1f2" 100.0 "100.0" "100.")
+ ("1F2" 100.0 "100.0" "100.")
+ ("1d2" 100.0 "100.0" "100.")
+ ("1D2" 100.0 "100.0" "100.")
+ ("1l2" 100.0 "100.0" "100.")
+ ("1L2" 100.0 "100.0" "100.")
+ ("1e2e3" #f)
+ ("1e2s3" #f)
+ ("1e2.0" #f)
+
+ "Decimal notation with suffix and padding"
+ ("1#e2" 1000.0 1500.0 "1000.0" "1500.0" "1000." "1500." "1.0e3" "15.0e2")
+ ("1e2#" #f)
+
+ "NaN, Inf"
+ ("+nan.0" the-nan "+NaN.0")
+ ("+NAN.0" the-nan "+nan.0" "+NaN.0")
+ ("+nan.1" #f)
+ ("+nan.01" #f)
+ ("+inf.0" pos-inf "+Inf.0")
+ ("+InF.0" pos-inf "+inf.0" "+Inf.0")
+ ("-inf.0" neg-inf "-Inf.0")
+ ("-iNF.0" neg-inf "-inf.0" "-Inf.0")
+ ("+inf.01" #f)
+ ("+inf.1" #f)
+ ("-inf.01" #f)
+ ("-inf.1" #f)
+ ("+inf.0/1" #f)
+ ("1/+inf.0" #f)
+#|
+ ;; DEPRECATED (Disabled during deprecation period of "[+-]nan", "[+-]inf")
+ ("+nan" #f)
+ ("+inf" #f)
+ ("-inf" #f)
+ ("nan.0" #f)
+ ("inf.0" #f)
+|#
+
+ "Fractions"
+ ("1/2" (/ 1 2) "0.5" ".5" "500.0e-3")
+ ("10/2" 5.0 "5.0")
+ ("-1/2" (- (/ 1 2)) "-0.5" "-.5" "-500.0e-3")
+ ("1/-2" #f)
+ ("1.0/2" #f)
+ ("1/2.0" #f)
+ ("1/2e2" #f)
+ ("1/2e2" #f)
+ ("1#/2" 5.0 7.5 "5.0" "5." "7.5")
+ ("1/2#" 0.05 "0.05" ".05" "50.0e-3")
+ ("1#/#" #f)
+ ("1/" #f)
+ ("1/+" #f)
+ ("+/1" #f)
+ ("/1" #f)
+ ("/" #f)
+
+ "Some invalid complex numbers syntax (not supported at all)"
+ ("2i" #f)
+ ("+-i" #f)
+ ("i" #f)
+ ("1+2i1" #f)
+ ("1+2" #f)
+ ("1#+#i" #f)
+
+ "Base prefixes"
+ ("#x11" 17 "17")
+ ("#X11" 17 "17")
+ ("#d11" 11 "11")
+ ("#D11" 11 "11")
+ ("#o11" 9 "9")
+ ("#O11" 9 "9")
+ ("#b11" 3 "3")
+ ("#B11" 3 "3")
+ ("#da1" #f)
+ ("#o8" #f)
+ ("#b2" #f)
+ ("#o7" 7 "7")
+ ("#xa" 10 "10")
+ ("#xA" 10 "10")
+ ("#xf" 15 "15")
+ ("#xg" #f)
+ ("#x-10" -16 "-16")
+ ("#d-10" -10 "-10")
+ ("#o-10" -8 "-8")
+ ("#b-10" -2 "-2")
+ ("-#x10" #f)
+ ("-#d10" #f)
+ ("-#o10" #f)
+ ("-#b10" #f)
+ ("#x-" #f)
+ ("#x" #f)
+ ("#d" #f)
+ ("#d-" #f)
+ ("#d+" #f)
+ ("#o" #f)
+ ("#o-" #f)
+ ("#b" #f)
+ ("#b-" #f)
+ ("#e" #f)
+ ("#e-" #f)
+ ("#i" #f)
+ ("#i-" #f)
+
+ "Combination of prefixes"
+ ("#x#x11" #f)
+ ("#x#b11" #f)
+ ("#b#o11" #f)
+ ("#e#x10" 16 "16")
+ ("#i#x10" 16.0 "16.0" "16.")
+ ("#e#e10" #f)
+ ("#e#e#x10" #f)
+ ("#E#e#X10" #f)
+ ("#i#e#x10" #f)
+ ("#e#x#e10" #f)
+ ("#x#x#e10" #f)
+ ("#x#e#x10" #f)
+
+ "Base prefixes with padding"
+ ("#x1#0" #f)
+ ("#d1#0" #f)
+ ("#o1#0" #f)
+ ("#b1#0" #f)
+ ("#x1#" 16.0 24.0 "16.0" "24.0" "16." "24.")
+ ("#d1#" 10.0 15.0 "10.0" "15.0" "10." "15.")
+ ("#o1#" 8.0 12.0 "8.0" "12.0" "8." "12.")
+ ("#b1#" 2.0 3.0 "2.0" "3.0" "2." "3.")
+
+ "(Attempted) decimal notation with base prefixes"
+ ("#x1.0" #f)
+ ("#d1.0" 1.0 "1.0" "1.")
+ ("#o1.0" #f)
+ ("#b1.0" #f)
+ ("#x1.#" #f)
+ ("#d1.#" 1.0 1.5 "1.0" "1.5" "1.")
+ ("#o1.#" #f)
+ ("#b1.#" #f)
+ ("#x1." #f)
+ ("#d1." 1.0 "1.0" "1.")
+ ("#o1." #f)
+ ("#b1." #f)
+ ("#x.1" #f)
+ ("#d.1" 0.1 "0.1" ".1" "100.0e-3")
+ ("#o.1" #f)
+ ("#b.1" #f)
+ ("#x1e2" 482 "482")
+ ("#d1e2" 100.0 "100.0" "100.")
+ ("#o1e2" #f)
+ ("#b1e2" #f)
+
+ "Fractions with prefixes"
+ ("#x10/2" 8.0 "8.0" "8.")
+ ("#x11/2" 8.5 "8.5")
+ ("#d11/2" 5.5 "5.5")
+ ("#o11/2" 4.5 "4.5")
+ ("#b11/10" 1.5 "1.5")
+ ("#b11/2" #f)
+ ("#x10/#o10" #f)
+ ("10/#o10" #f)
+ ("#x1#/2" 8.0 12.0 "8.0" "8." "12.0" "12.")
+ ("#d1#/2" 5.0 7.5 "5.0" "5." "7.5")
+ ("#o1#/2" 4.0 6.0 "4.0" "4." "6.0" "6.")
+ ("#b1#/2" #f)
+ ("#b1#/10" 1.0 1.5 "1.0" "1." "1.5")
+ )
\ No newline at end of file
diff --git a/tests/runtests.sh b/tests/runtests.sh
index 4a24457a..c33ef051 100644
--- a/tests/runtests.sh
+++ b/tests/runtests.sh
@@ -284,6 +284,10 @@ echo "======================================== fixnum tests ..."
$compile fixnum-tests.scm
./a.out
+echo "======================================== string->number tests ..."
+$compile numbers-string-conversion-tests.scm
+./a.out
+
echo "======================================== srfi-4 tests ..."
$interpret -s srfi-4-tests.scm
Trap