~ chicken-core (chicken-5) d9f2ad87b42ff5f545e70248a02f92c4a34e8267


commit d9f2ad87b42ff5f545e70248a02f92c4a34e8267
Author:     Peter Bex <peter.bex@xs4all.nl>
AuthorDate: Sat Aug 18 21:26:50 2012 +0200
Commit:     felix <felix@call-with-current-continuation.org>
CommitDate: Mon Aug 20 19:34:56 2012 +0200

    Add embedded NUL byte checks to all(?) C functions that accept strings and are called directly instead of through the FFI with 'c-string' or via the ##sys#make-c-string procedure
    
    Signed-off-by: felix <felix@call-with-current-continuation.org>

diff --git a/NEWS b/NEWS
index 01a3bf99..a8ce84eb 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,8 @@
   - on 64-bit machines the "random" procedure no longer truncates result
     values (which caused very nonrandom results for very large values).
     Note that random shouldn't be used for security-critical code.
+  - Added checks for embedded '\0' characters in strings passed to some
+    C functions on a lower level than Chicken's FFI.
 
 - Build system
   - version information has been moved into a separate unit to make the
diff --git a/chicken.h b/chicken.h
index f5ce0d94..f82c708d 100644
--- a/chicken.h
+++ b/chicken.h
@@ -572,6 +572,7 @@ static inline int isinf_ld (long double x)
 #define C_BAD_ARGUMENT_TYPE_NO_INPUT_PORT_ERROR       40
 #define C_BAD_ARGUMENT_TYPE_NO_OUTPUT_PORT_ERROR      41
 #define C_PORT_CLOSED_ERROR                           42
+#define C_ASCIIZ_REPRESENTATION_ERROR                 43
 
 
 /* Platform information */
diff --git a/library.scm b/library.scm
index ebf7ff45..76eb046f 100644
--- a/library.scm
+++ b/library.scm
@@ -4134,6 +4134,7 @@ EOF
 	((40) (apply ##sys#signal-hook #:type-error loc "bad argument type - not an input-port" args))
 	((41) (apply ##sys#signal-hook #:type-error loc "bad argument type - not an output-port" args))
 	((42) (apply ##sys#signal-hook #:file-error loc "port already closed" args))
+	((43) (apply ##sys#signal-hook #:type-error loc "cannot represent string with NUL bytes as C string" args))
 	(else (apply ##sys#signal-hook #:runtime-error loc "unknown internal error" args)) ) ) ) )
 
 
@@ -4177,9 +4178,8 @@ EOF
     (##core#inline "C_setsubchar" buf len #\nul)
     (if (fx= (##core#inline "C_asciiz_strlen" buf) len)
         buf
-        (##sys#signal-hook #:type-error loc
-                           "cannot represent string with NUL bytes as C string"
-                           str))) )
+        (##sys#error-hook (foreign-value "C_ASCIIZ_REPRESENTATION_ERROR" int)
+                          loc str))) )
 
 (define ##sys#peek-signed-integer (##core#primitive "C_peek_signed_integer"))
 (define ##sys#peek-unsigned-integer (##core#primitive "C_peek_unsigned_integer"))
diff --git a/posixunix.scm b/posixunix.scm
index a134904c..c851319f 100644
--- a/posixunix.scm
+++ b/posixunix.scm
@@ -224,6 +224,7 @@ static void C_fcall C_set_arg_string(char **where, int i, char *a, int len) {
     ptr = (char *)C_malloc(len + 1);
     C_memcpy(ptr, a, len);
     ptr[ len ] = '\0';
+    /* Can't barf() here, so the NUL byte check happens in Scheme */
   }
   else ptr = NULL;
   where[ i ] = ptr;
@@ -1765,9 +1766,12 @@ EOF
             [else pid] ) ) ) ) )
 
 (define process-execute
-  (let ([setarg (foreign-lambda void "C_set_exec_arg" int scheme-pointer int)]
+  ;; NOTE: We use c-string here instead of scheme-object.
+  ;; Because set_exec_* make a copy, this implies a double copy.
+  ;; At least it's secure, we can worry about performance later, if at all
+  (let ([setarg (foreign-lambda void "C_set_exec_arg" int c-string int)]
         [freeargs (foreign-lambda void "C_free_exec_args")]
-        [setenv (foreign-lambda void "C_set_exec_env" int scheme-pointer int)]
+        [setenv (foreign-lambda void "C_set_exec_env" int c-string int)]
         [freeenv (foreign-lambda void "C_free_exec_env")]
         [pathname-strip-directory pathname-strip-directory] )
     (lambda (filename #!optional (arglist '()) envlist)
diff --git a/posixwin.scm b/posixwin.scm
index 88a4a4f6..5b5d0b70 100644
--- a/posixwin.scm
+++ b/posixwin.scm
@@ -262,6 +262,7 @@ C_set_arg_string(char **where, int i, char *dat, int len)
 	ptr = (char *)C_malloc(len + 1);
 	C_memcpy(ptr, dat, len);
 	ptr[ len ] = '\0';
+        /* Can't barf() here, so the NUL byte check happens in Scheme */
     }
     else
 	ptr = NULL;
@@ -830,6 +831,7 @@ C_process(const char * app, const char * cmdlin, const char ** env,
 		pb += strlen(*p) + 1;
 	    }
 	    *pb = '\0';
+            /* This _should_ already have been checked for embedded NUL bytes */
 	}
 	else
 	    success = FALSE;
@@ -1504,8 +1506,11 @@ EOF
 		    olst)) ) ) ) ) ) ) )
 
 (define $exec-setup
-  (let ([setarg (foreign-lambda void "C_set_exec_arg" int scheme-pointer int)]
-	[setenv (foreign-lambda void "C_set_exec_env" int scheme-pointer int)]
+  ;; NOTE: We use c-string here instead of scheme-object.
+  ;; Because set_exec_* make a copy, this implies a double copy.
+  ;; At least it's secure, we can worry about performance later, if at all
+  (let ([setarg (foreign-lambda void "C_set_exec_arg" int c-string int)]
+	[setenv (foreign-lambda void "C_set_exec_env" int c-string int)]
 	[build-exec-argvec
 	  (lambda (loc lst argvec-setter idx)
 	    (if lst
@@ -1584,6 +1589,7 @@ EOF
 ; where stdin-input-port?, etc. is a port or #f, indicating no port created.
 
 (define ##sys#process
+  ;; XXX TODO: When environment is implemented, check for embedded NUL bytes!
   (let ([c-process
 	  (foreign-lambda bool "C_process" c-string c-string c-pointer
 	    (c-pointer int) (c-pointer int) (c-pointer int) (c-pointer int) int)])
diff --git a/runtime.c b/runtime.c
index caf0e11f..2673d815 100644
--- a/runtime.c
+++ b/runtime.c
@@ -1645,6 +1645,11 @@ void barf(int code, char *loc, ...)
     msg = C_text("port already closed");
     c = 1;
     break;
+ 
+  case C_ASCIIZ_REPRESENTATION_ERROR:
+    msg = C_text("cannot represent string with NUL bytes as C string");
+    c = 1;
+    break;
 
   default: panic(C_text("illegal internal error code"));
   }
@@ -3791,6 +3796,7 @@ C_word C_halt(C_word msg)
 	n = sizeof(buffer) - 1;
       C_strncpy(buffer, (C_char *)C_data_pointer(msg), n);
       buffer[ n ] = '\0';
+      /* XXX msg isn't checked for NUL bytes, but we can't barf here either! */
     }
     else C_strcpy(buffer, C_text("(aborted)"));
 
@@ -3819,12 +3825,18 @@ C_word C_halt(C_word msg)
 
 C_word C_message(C_word msg)
 {
+  unsigned int n = C_header_size(msg);
+  /*
+   * Strictly speaking this isn't necessary for the non-gui-mode,
+   * but let's try and keep this consistent across modes.
+   */
+  if (memchr(C_c_string(msg), '\0', n) != NULL)
+    barf(C_ASCIIZ_REPRESENTATION_ERROR, "##sys#message", msg);
+
   if(C_gui_mode) {
-    int n = C_header_size(msg);
-    
     if (n >= sizeof(buffer))
       n = sizeof(buffer) - 1;
-    C_strncpy(buffer, (C_char *)((C_SCHEME_BLOCK *)msg)->data, n);
+    C_strncpy(buffer, C_c_string(msg), n);
     buffer[ n ] = '\0';
 #if defined(_WIN32) && !defined(__CYGWIN__)
     MessageBox(NULL, buffer, C_text("CHICKEN runtime"), MB_OK | MB_ICONEXCLAMATION);
@@ -3832,7 +3844,7 @@ C_word C_message(C_word msg)
 #endif
   } /* fall through */
 
-  C_fwrite(((C_SCHEME_BLOCK *)msg)->data, C_header_size(msg), sizeof(C_char), stdout);
+  C_fwrite(C_c_string(msg), n, sizeof(C_char), stdout);
   C_putchar('\n');
   return C_SCHEME_UNDEFINED;
 }
@@ -4007,6 +4019,8 @@ C_regparm C_word C_fcall C_execute_shell_command(C_word string)
 
   C_memcpy(buf, ((C_SCHEME_BLOCK *)string)->data, n);
   buf[ n ] = '\0';
+  if (n != strlen(buf))
+    barf(C_ASCIIZ_REPRESENTATION_ERROR, "get-environment-variable", string);
 
   n = C_system(buf);
 
@@ -7011,10 +7025,14 @@ void C_ccall C_open_file_port(C_word c, C_word closure, C_word k, C_word port, C
 
     C_strncpy(buf, C_c_string(channel), n);
     buf[ n ] = '\0';
+    if (n != strlen(buf))
+      barf(C_ASCIIZ_REPRESENTATION_ERROR, "open", channel);
     n = C_header_size(mode);
     if (n >= sizeof(fmode)) n = sizeof(fmode) - 1;
     C_strncpy(fmode, C_c_string(mode), n);
     fmode[ n ] = '\0';
+    if (n != strlen(fmode)) /* Shouldn't happen, but never hurts */
+      barf(C_ASCIIZ_REPRESENTATION_ERROR, "open", mode);
     fp = C_fopen(buf, fmode);
 
     if(buf != buffer) C_free(buf);
@@ -7269,6 +7287,8 @@ C_a_i_string_to_number(C_word **a, int c, C_word str, C_word radix0)
 
   C_memcpy(sptr = buffer, C_c_string(str), n > (STRING_BUFFER_SIZE - 1) ? STRING_BUFFER_SIZE : n);
   buffer[ n ] = '\0';
+  if (n != strlen(buffer)) /* Don't barf; this is simply invalid number syntax */
+    goto fail;
   
   while(*sptr == '#') {
     switch(C_tolower((int)*(++sptr))) {
@@ -7829,13 +7849,15 @@ void C_ccall C_get_environment_variable(C_word c, C_word closure, C_word k, C_wo
   if(c != 3) C_bad_argc(c, 3);
 
   if(C_immediatep(name) || C_header_bits(name) != C_STRING_TYPE)
-    barf(C_BAD_ARGUMENT_TYPE_ERROR, "getenv", name);
+    barf(C_BAD_ARGUMENT_TYPE_ERROR, "get-environment-variable", name);
   
   if((len = C_header_size(name)) >= STRING_BUFFER_SIZE)
     C_kontinue(k, C_SCHEME_FALSE);
 
   strncpy(buffer, C_c_string(name), len);
   buffer[ len ] = '\0';
+  if (len != strlen(buffer))
+    barf(C_ASCIIZ_REPRESENTATION_ERROR, "get-environment-variable", name);
 
   if((save_string = C_do_getenv(buffer)) == NULL)
     C_kontinue(k, C_SCHEME_FALSE);
diff --git a/tests/library-tests.scm b/tests/library-tests.scm
index 4141c6f4..c986ef16 100644
--- a/tests/library-tests.scm
+++ b/tests/library-tests.scm
@@ -411,3 +411,7 @@
 (assert (= 1 (eval 1)))
 (assert (eq? '() (receive (eval '(values)))))
 (assert (equal? '(1 2 3) (receive (eval '(values 1 2 3)))))
+
+;;; message checks for invalid strings
+
+(assert-fail (##sys#message "123\x00456"))
\ No newline at end of file
diff --git a/tests/numbers-string-conversion-tests.scm b/tests/numbers-string-conversion-tests.scm
index 505132a5..e02341d9 100644
--- a/tests/numbers-string-conversion-tests.scm
+++ b/tests/numbers-string-conversion-tests.scm
@@ -111,6 +111,7 @@
  ("#i1" 1.0 "1.0" "1.")
  ("#I1" 1.0 "1.0" "1.")
  ("#i-1" (- 1.0) "-1.0" "-1.")
+ ("123\x00456" #f)
  ("-#i1" #f)
  ("+-1" #f)
  ("" #f)
diff --git a/tests/port-tests.scm b/tests/port-tests.scm
index f162edb7..df11e712 100644
--- a/tests/port-tests.scm
+++ b/tests/port-tests.scm
@@ -1,5 +1,10 @@
 (require-extension srfi-1 ports utils srfi-4 extras tcp posix)
 
+(define-syntax assert-error
+  (syntax-rules ()
+    ((_ expr) 
+     (assert (handle-exceptions _ #t expr #f)))))
+
 (define *text* #<<EOF
 this is a test
 <foof> #;33> (let ((in (open-input-string ""))) (close-input-port in)
@@ -195,3 +200,6 @@ EOF
     (check (read-string 10 in))
     (check "read-string!" (let ((buf (make-string 10)))
                             (read-string! 10 buf in) buf))))
+
+(print "\nEmbedded NUL bytes in filenames are rejected\n")
+(assert-error (with-output-to-file "embedded\x00null-byte" void))
\ No newline at end of file
diff --git a/tests/posix-tests.scm b/tests/posix-tests.scm
index e069a5af..0d58bac1 100644
--- a/tests/posix-tests.scm
+++ b/tests/posix-tests.scm
@@ -1,5 +1,10 @@
 (use files posix)
 
+(define-syntax assert-error
+  (syntax-rules ()
+    ((_ expr) 
+     (assert (handle-exceptions _ #t expr #f)))))
+
 (define-constant SOME-POS 123456)
 
 (let ((tnpfilpn (create-temporary-file)))
@@ -16,3 +21,13 @@
       (assert (= SOME-POS (file-position port)))
       (close-output-port port)
       (delete-file* tnpfilpn) ) ) )
+
+(assert-error (get-environment-variable "with\x00embedded-NUL"))
+(assert-error (setenv "with\x00embedded-NUL" "blabla"))
+(assert-error (setenv "blabla" "with\x00embedded-NUL"))
+(assert-error (system "echo this is \x00 not okay"))
+;; Use "false" to signal to the calling script that there was an error,
+;; even if the process will get called
+(assert-error (process-execute "false\x00123"))
+(assert-error (process-execute "false" '("1" "123\x00456")))
+(assert-error (process-execute "false" '("123\x00456") '("foo\x00bar" "blabla") ("lalala" "qux\x00mooh")))
Trap