139f7e9cdba897bc0969e761aede66218fcabb11

~ chicken-core (master) 139f7e9cdba897bc0969e761aede66218fcabb11
commit 139f7e9cdba897bc0969e761aede66218fcabb11
Author:     Peter Bex <peter.bex@xs4all.nl>
AuthorDate: Tue Sep 13 18:36:01 2011 +0200
Commit:     Peter Bex <peter.bex@xs4all.nl>
CommitDate: Tue Sep 13 18:36:01 2011 +0200

    Add torture test for numerical syntax and fix all failing testcases.
    
    Invalid syntax that used to be OK, like "+inf" and "nan", is currently
    still accepted but marked deprecated and the tests commented out.
    
    In the next release

diff --git a/NEWS b/NEWS
index 3fd1332c..450c3c64 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,10 @@
 - Runtime system
   - fixed handling of "inf" and nan" floating-point predicates for Solaris
     (thanks to Claude Marinier)
+  - deprecated "[+-]nan", "[+-]inf" and other notations "accidentally"
+    accepted by Chicken due to the underlying C library's strtod() function,
+    standardizing on "[+-]nan.0" and "[+-]inf.0" from R6RS (and soon R7RS),
+    when displaying numbers only these forms are generated now.
   - support for re-loading of compiled files has now been completely 
     removed
 
diff --git a/runtime.c b/runtime.c
index 399713eb..5f8a0caa 100644
--- a/runtime.c
+++ b/runtime.c
@@ -7196,9 +7196,9 @@ void C_ccall C_quotient(C_word c, C_word closure, C_word k, C_word n1, C_word n2
 C_regparm C_word C_fcall
 C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0)
 {
-  int radix, radixpf = 0, sharpf = 0, ratp = 0, exactf, exactpf = 0, periodf = 0;
+  int radix, radixpf = 0, sharpf = 0, ratf = 0, exactf, exactpf = 0, periodf = 0, expf = 0;
   C_word n1, n;
-  C_char *sptr, *eptr;
+  C_char *sptr, *eptr, *rptr;
   double fn1, fn;
 
   if(radix0 & C_FIXNUM_BIT) radix = C_unfix(radix0);
@@ -7219,47 +7219,73 @@ C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0)
   buffer[ n ] = '\0';
   
   while(*sptr == '#') {
-    switch(*(++sptr)) {
+    switch(C_tolower((int)*(++sptr))) {
     case 'b': if(radixpf) goto fail; else { radix = 2; radixpf = 1; } break;
     case 'o': if(radixpf) goto fail; else { radix = 8; radixpf = 1; } break;
     case 'd': if(radixpf) goto fail; else { radix = 10; radixpf = 1; } break;
     case 'x': if(radixpf) goto fail; else { radix = 16; radixpf = 1; } break;
     case 'e': if(exactpf) goto fail; else { exactf = 1; exactpf = 1; } break;
     case 'i': if(exactpf) goto fail; else { exactf = 0; exactpf = 1; } break;
-    default: --sptr;
+    default: goto fail;  /* Unknown prefix type */
     }
 
     ++sptr;
   }
-
-  /* check for embedded '#'s and double '.'s: */
-  for(eptr = sptr; *eptr != '\0'; ++eptr) {
-    switch(*eptr) {
+  
+  /* Scan for embedded special characters and do basic sanity checking: */
+  for(eptr = sptr, rptr = sptr; *eptr != '\0'; ++eptr) {
+    switch(C_tolower((int)*eptr)) {
     case '.': 
-      if(periodf) goto fail;
+      if(periodf || ratf || expf) goto fail;
       
       periodf = 1;
       break;
 
     case '#':
-      if(eptr[ 1 ] == '\0' || C_strchr("#.0123456789", eptr[ 1 ]) != NULL) {
-	sharpf = 1;
-	*eptr = '0';
-      }
-      else goto fail;
+      if (expf || (eptr == rptr) ||
+	  (!sharpf && (eptr == rptr+1) && (C_strchr("+-.", *rptr) != NULL)))
+        goto fail;
+      
+      sharpf = 1;
+      *eptr = '0';
+      
+      break;
+    case '/':
+      if(periodf || ratf || expf || eptr == sptr) goto fail;
       
+      sharpf = 0; /* Allow sharp signs in the denominator */
+      ratf = 1;
+      rptr = eptr+1;
+      break;
+    case 'e':
+    case 'd':
+    case 'f':
+    case 'l':
+    case 's':
+      /* Don't set exp flag if we see the "f" in "inf.0" (preceded by 'n') */
+      /* Other failure modes are handled elsewhere. */
+      if(radix == 10 && eptr > sptr && C_tolower((int)*(eptr-1)) != 'n') {
+        if (ratf) goto fail;
+	
+        expf = 1;
+	sharpf = 0;
+	*eptr = 'e'; /* strtod() normally only understands 'e', not dfls */
+      }
+      break;
+    default:
+      if(sharpf) goto fail;
       break;
     }
   }
-
+  if (eptr == rptr) goto fail; /* Disallow "empty" numbers like "#x" and "1/" */
+  
   /* check for rational representation: */
-  if((eptr = C_strchr(sptr, '/')) != NULL) {
-    if (eptr == sptr) {
-        n = C_SCHEME_FALSE;
-        goto fini;
+  if(rptr != sptr) {
+    if (*(rptr) == '-' || *(rptr) == '+') {
+      n = C_SCHEME_FALSE;
+      goto fini;
     }
-    *eptr = '\0';
-    ratp = 1;
+    *(rptr-1) = '\0';
 
     switch(convert_string_to_number(sptr, radix, &n1, &fn1)) {
     case 0:
@@ -7273,7 +7299,7 @@ C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0)
       /* case 2: nop */
     }
 
-    sptr = eptr + 1;
+    sptr = rptr;
   }    
   
   /* convert number and return result: */
@@ -7283,8 +7309,8 @@ C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0)
     break;
 
   case 1:			/* fixnum */
-    if(sharpf || ratp || (exactpf && !exactf)) {
-      n = C_flonum(a, ratp ? fn1 / (double)n : (double)n);
+    if(sharpf || ratf || (exactpf && !exactf)) {
+      n = C_flonum(a, ratf ? fn1 / (double)n : (double)n);
 
       if(exactpf && exactf) n = C_i_inexact_to_exact(n);
     }
@@ -7293,7 +7319,7 @@ C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0)
     break;
 
   case 2:			/* flonum */
-    n = C_flonum(a, ratp ? fn1 / fn : fn);
+    n = C_flonum(a, ratf ? fn1 / fn : fn);
 
     if(exactpf && exactf) n = C_i_inexact_to_exact(n);
 
@@ -7354,28 +7380,35 @@ C_regparm C_word C_fcall convert_string_to_number(C_char *str, int radix, C_word
   C_word n;
   C_char *eptr, *eptr2;
   double fn;
-#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__OpenBSD__)
   int len = C_strlen(str);
 
-  if(len >= 4) {
-    if(!C_strncmp(str, "+nan.0", len)) {
-      *flo = 0.0/0.0;
-      return 2;
-    }
-    else if(!C_strncmp(str, "-nan.0", len)) {
-      *flo = -0.0/0.0;
-      return 2;
-    }
-    else if(!C_strncmp(str, "+inf.0", len)) {
-      *flo = 1.0/0.0;
-      return 2;
+  if(radix == 10) {
+    if (len >= 4 && len <= 6) { /* DEPRECATED, TODO: Change to (len == 4) */
+      if((*str == '+' || *str == '-') &&
+         C_strchr("inIN", *(str+1)) != NULL &&
+         C_strchr("naNA", *(str+2)) != NULL &&
+         C_strchr("fnFN", *(str+3)) != NULL &&
+         /* DEPRECATED, TODO: Rip out len checks */
+         (len == 4 || *(str+4) == '.') && (len == 5 || (*(str+5) == '0'))) {
+        if (*(str+1) == 'i' || *(str+1) == 'I')   /* Inf */
+          *flo = 1.0/0.0;
+        else                                      /* NaN */
+          *flo = 0.0/0.0;
+        if (*str == '-')
+          *flo *= -1.0;
+        return 2;
+      }
     }
-    else if(!C_strncmp(str, "-inf.0", len)) {
-      *flo = -1.0/0.0;
-      return 2;
+    /* DEPRECATED (enable in next release) */
+#if 0
+    /* This is disabled during the deprecation period of "+nan" syntax */
+    /* Prevent C parser from accepting things like "-inf" on its own... */
+    for(n = 0; n < len; ++n) {
+      if (C_strchr("+-0123456789e.", *(str+n)) == NULL)
+        return 0;
     }
-  }
 #endif
+  }
 
   if(C_strpbrk(str, "xX\0") != NULL) return 0;
 
@@ -7505,7 +7538,6 @@ void C_ccall C_number_to_string(C_word c, C_word closure, C_word k, C_word num,
       }
     } 
 
-#if defined(__CYGWIN__) || defined(__MINGW32__)
     if(C_isnan(f)) {
       C_strcpy(p = buffer, "+nan.0");
       goto fini;
@@ -7514,7 +7546,6 @@ void C_ccall C_number_to_string(C_word c, C_word closure, C_word k, C_word num,
       C_sprintf(p = buffer, "%cinf.0", f > 0 ? '+' : '-');
       goto fini;
     }
-#endif
 
 #ifdef HAVE_GCVT
     p = C_gcvt(f, flonum_print_precision, buffer); /* p unused, but we want to avoid stupid warnings */
diff --git a/tests/numbers-string-conversion-tests.scm b/tests/numbers-string-conversion-tests.scm
new file mode 100644
index 00000000..c6e1d3ca
--- /dev/null
+++ b/tests/numbers-string-conversion-tests.scm
@@ -0,0 +1,280 @@
+;;;
+;;; Numerical syntax "torture test"
+;;;
+;;; This tries to test a lot of edge cases in Scheme's numerical syntax.
+;;;
+;;; Output is written so that if you run it through "grep ERROR" it will
+;;; output nothing (and exit status will be nonzero) if there are no errors.
+;;; If you run it through "tail -n 1" you will just get the total error summary.
+;;;
+;;; This code assumes that string->number accepts numbers with embedded radix
+;;; specifiers (R5RS mentions that it's allowed to return #f in those cases).
+;;; It also doesn't try to support Schemes which support *only* integers or
+;;; *only* flonums (which is also allowed by R5RS).
+;;;
+
+(define the-nan (fp/ 0.0 0.0))
+(define pos-inf (fp/ 1.0 0.0))
+(define neg-inf (fp/ -1.0 0.0))
+
+(define (nan? x) (and (number? x) (not (= x x))))
+
+(define total-errors 0)
+
+;; Here comes a horrible nasty hack.  It seems to work though ;)
+(define-syntax test-numbers
+  (syntax-rules (compnums fractions)
+    ((_ (str value ...) rest ...)
+     (begin
+       (let ((res (string->number str)))
+         (if (not (or (and (not (string? value)) (equal? res value)) ...
+                      (and res (nan? res) (or (and value (nan? value)) ...))))
+             (begin (display       "PARSE ERROR         ")
+                    (write '(str value ...))
+                    (display " => ") (write res) (newline)
+                    (set! total-errors (+ total-errors 1)))
+             (let ((re-str (and res (number->string res))))
+               (if (not (or (and res (string=? re-str str))
+                            (and (not res) (not value)) ...
+                            (and res (string? value) (string=? re-str value)) ...))
+                   (begin (display "SERIALIZATION ERROR ")
+                          (write `(str value ...))
+                          (display " => ") (write re-str) (newline)
+                          (set! total-errors (+ total-errors 1)))
+                   (begin (display "OK                  ")
+                          (write '(str value ...)) (newline))))))
+       (test-numbers rest ...)))
+    ((_ "no-totals") #f)
+    ((_ x rest ...)
+     (begin (newline) (display "-> ") (display x) (newline)
+            (display "-----------------------------------------------------")
+            (newline)
+            (test-numbers rest ...)))
+    ((_)
+     (if (= 0 total-errors)
+         (begin (newline)
+                (display "-----> Everything OK, no errors!")
+                (newline))
+         (begin (newline)
+                (display "-----> TOTAL ERRORS: ")
+                (display total-errors)
+                (newline)
+                (error total-errors))))))
+
+(test-numbers
+ "Simple integers"
+ ("1" 1)
+ ("+1" 1 "1")
+ ("-1" (- 1))
+ ("#i1" 1.0 "1.0" "1.")
+ ("#I1" 1.0 "1.0" "1.")
+ ("#i-1" (- 1.0) "-1.0" "-1.")
+ ("-#i1" #f)
+ ("+-1" #f)
+ ("" #f)
+ ("-" #f)
+ ("+" #f)
+ ("+-" #f)
+
+ "Basic decimal notation"
+ ("1.0" (exact->inexact 1) "1.")
+ ("1." 1.0 "1.0" "1.")
+ ("1.#" 1.0 1.5 "1.0" "1." "1.5")
+ (".1" 0.1 "0.1" "100.0e-3")
+ ("-.1" (- 0.1) "-0.1" "-100.0e-3")
+ ;; Some Schemes don't allow negative zero. This is okay with the standard
+ ("-.0" -0.0 "-0." "-0.0" "0.0" "0." ".0")
+ ("-0." -0.0 "-.0" "-0.0" "0.0" "0." ".0")
+ ("." #f)
+ (".1." #f)
+ ("..1" #f)
+ ("1.." #f)
+ ("#i1.0" 1.0 "1.0" "1.")
+ ("#e1.0" 1 "1")
+ ("#e-.0" 0 "0")
+ ("#e-0." 0 "0")
+ ("-#e.0" #f)
+
+ "Decimal notation with padding"
+ ("1#" 10.0 15.0 "10.0" "15.0" "10." "15.")
+ ("#e1#" 10 15 "10" "15")
+ ("#E1#" 10 15 "10" "15")
+ ("#1" #f)
+ ("#" #f)
+ ("1#2" #f)
+ ("1.#2" #f)
+ (".#" #f)
+ ("#.#" #f)
+ ("#.1" #f)
+ ("1#.2" #f)
+ ("1#." 10.0 15.0 "10.0" "15.0" "10." "15.")
+
+ "Decimal notation with suffix"
+ ("1e2" 100.0 "100.0" "100.")
+ ("1E2" 100.0 "100.0" "100.")
+ ("1s2" 100.0 "100.0" "100.")
+ ("1S2" 100.0 "100.0" "100.")
+ ("1f2" 100.0 "100.0" "100.")
+ ("1F2" 100.0 "100.0" "100.")
+ ("1d2" 100.0 "100.0" "100.")
+ ("1D2" 100.0 "100.0" "100.")
+ ("1l2" 100.0 "100.0" "100.")
+ ("1L2" 100.0 "100.0" "100.")
+ ("1e2e3" #f)
+ ("1e2s3" #f)
+ ("1e2.0" #f)
+
+ "Decimal notation with suffix and padding"
+ ("1#e2" 1000.0 1500.0 "1000.0" "1500.0" "1000." "1500." "1.0e3" "15.0e2")
+ ("1e2#" #f)
+
+ "NaN, Inf"
+ ("+nan.0" the-nan "+NaN.0")
+ ("+NAN.0" the-nan "+nan.0" "+NaN.0")
+ ("+nan.1" #f)
+ ("+nan.01" #f)
+ ("+inf.0" pos-inf "+Inf.0")
+ ("+InF.0" pos-inf "+inf.0" "+Inf.0")
+ ("-inf.0" neg-inf "-Inf.0")
+ ("-iNF.0" neg-inf "-inf.0" "-Inf.0")
+ ("+inf.01" #f)
+ ("+inf.1" #f)
+ ("-inf.01" #f)
+ ("-inf.1" #f)
+ ("+inf.0/1" #f)
+ ("1/+inf.0" #f)
+#|
+ ;; DEPRECATED (Disabled during deprecation period of "[+-]nan", "[+-]inf")
+ ("+nan" #f)
+ ("+inf" #f)
+ ("-inf" #f)
+ ("nan.0" #f)
+ ("inf.0" #f)
+|#
+
+ "Fractions"
+ ("1/2" (/ 1 2) "0.5" ".5" "500.0e-3")
+ ("10/2" 5.0 "5.0")
+ ("-1/2" (- (/ 1 2)) "-0.5" "-.5" "-500.0e-3")
+ ("1/-2" #f)
+ ("1.0/2" #f)
+ ("1/2.0" #f)
+ ("1/2e2" #f)
+ ("1/2e2" #f)
+ ("1#/2" 5.0 7.5 "5.0" "5." "7.5")
+ ("1/2#" 0.05 "0.05" ".05" "50.0e-3")
+ ("1#/#" #f)
+ ("1/" #f)
+ ("1/+" #f)
+ ("+/1" #f)
+ ("/1" #f)
+ ("/" #f)
+
+ "Some invalid complex numbers syntax (not supported at all)"
+ ("2i" #f)
+ ("+-i" #f)
+ ("i" #f)
+ ("1+2i1" #f)
+ ("1+2" #f)
+ ("1#+#i" #f)
+
+ "Base prefixes"
+ ("#x11" 17 "17")
+ ("#X11" 17 "17")
+ ("#d11" 11 "11")
+ ("#D11" 11 "11")
+ ("#o11" 9 "9")
+ ("#O11" 9 "9")
+ ("#b11" 3 "3")
+ ("#B11" 3 "3")
+ ("#da1" #f)
+ ("#o8" #f)
+ ("#b2" #f)
+ ("#o7" 7 "7")
+ ("#xa" 10 "10")
+ ("#xA" 10 "10")
+ ("#xf" 15 "15")
+ ("#xg" #f)
+ ("#x-10" -16 "-16")
+ ("#d-10" -10 "-10")
+ ("#o-10" -8 "-8")
+ ("#b-10" -2 "-2")
+ ("-#x10" #f)
+ ("-#d10" #f)
+ ("-#o10" #f)
+ ("-#b10" #f)
+ ("#x-" #f)
+ ("#x" #f)
+ ("#d" #f)
+ ("#d-" #f)
+ ("#d+" #f)
+ ("#o" #f)
+ ("#o-" #f)
+ ("#b" #f)
+ ("#b-" #f)
+ ("#e" #f)
+ ("#e-" #f)
+ ("#i" #f)
+ ("#i-" #f)
+
+ "Combination of prefixes"
+ ("#x#x11" #f)
+ ("#x#b11" #f)
+ ("#b#o11" #f)
+ ("#e#x10" 16 "16")
+ ("#i#x10" 16.0 "16.0" "16.")
+ ("#e#e10" #f)
+ ("#e#e#x10" #f)
+ ("#E#e#X10" #f)
+ ("#i#e#x10" #f)
+ ("#e#x#e10" #f)
+ ("#x#x#e10" #f)
+ ("#x#e#x10" #f)
+
+ "Base prefixes with padding"
+ ("#x1#0" #f)
+ ("#d1#0" #f)
+ ("#o1#0" #f)
+ ("#b1#0" #f)
+ ("#x1#" 16.0 24.0 "16.0" "24.0" "16." "24.")
+ ("#d1#" 10.0 15.0 "10.0" "15.0" "10." "15.")
+ ("#o1#" 8.0 12.0 "8.0" "12.0" "8." "12.")
+ ("#b1#" 2.0 3.0 "2.0" "3.0" "2." "3.")
+
+ "(Attempted) decimal notation with base prefixes"
+ ("#x1.0" #f)
+ ("#d1.0" 1.0 "1.0" "1.")
+ ("#o1.0" #f)
+ ("#b1.0" #f)
+ ("#x1.#" #f)
+ ("#d1.#" 1.0 1.5 "1.0" "1.5" "1.")
+ ("#o1.#" #f)
+ ("#b1.#" #f)
+ ("#x1." #f)
+ ("#d1." 1.0 "1.0" "1.")
+ ("#o1." #f)
+ ("#b1." #f)
+ ("#x.1" #f)
+ ("#d.1" 0.1 "0.1" ".1" "100.0e-3")
+ ("#o.1" #f)
+ ("#b.1" #f)
+ ("#x1e2" 482 "482")
+ ("#d1e2" 100.0 "100.0" "100.")
+ ("#o1e2" #f)
+ ("#b1e2" #f)
+
+ "Fractions with prefixes"
+ ("#x10/2" 8.0 "8.0" "8.")
+ ("#x11/2" 8.5 "8.5")
+ ("#d11/2" 5.5 "5.5")
+ ("#o11/2" 4.5 "4.5")
+ ("#b11/10" 1.5 "1.5")
+ ("#b11/2" #f)
+ ("#x10/#o10" #f)
+ ("10/#o10" #f)
+ ("#x1#/2" 8.0 12.0 "8.0" "8." "12.0" "12.")
+ ("#d1#/2" 5.0 7.5 "5.0" "5." "7.5")
+ ("#o1#/2" 4.0 6.0 "4.0" "4." "6.0" "6.")
+ ("#b1#/2" #f)
+ ("#b1#/10" 1.0 1.5 "1.0" "1." "1.5")
+ )
\ No newline at end of file
diff --git a/tests/runtests.sh b/tests/runtests.sh
index 4a24457a..c33ef051 100644
--- a/tests/runtests.sh
+++ b/tests/runtests.sh
@@ -284,6 +284,10 @@ echo "======================================== fixnum tests ..."
 $compile fixnum-tests.scm
 ./a.out
 
+echo "======================================== string->number tests ..."
+$compile numbers-string-conversion-tests.scm
+./a.out
+
 echo "======================================== srfi-4 tests ..."
 $interpret -s srfi-4-tests.scm
Trap