~ chicken-core (chicken-5) 37f94e90aeb1f38cb0e856f4cebfec38a1d9672a


commit 37f94e90aeb1f38cb0e856f4cebfec38a1d9672a
Author:     Peter Bex <peter@more-magic.net>
AuthorDate: Sat Dec 5 16:28:59 2015 +0100
Commit:     Evan Hanson <evhan@foldling.org>
CommitDate: Sat Jan 16 19:55:59 2016 +1300

    Add simple statistical profiler to runtime library
    
    This enables collection of profiling data via statistical sampling to
    every program built with CHICKEN.  It relies on trace information for
    determining which procedure is running.  This also means it has a finer
    granularity than the default instrumentation-based profiler.  This can
    be an advantage or disadvantage depending on what you're trying to do.
    
    Signed-off-by: Evan Hanson <evhan@foldling.org>

diff --git a/chicken.h b/chicken.h
index 5f78ac6d..5a41f73f 100644
--- a/chicken.h
+++ b/chicken.h
@@ -1827,6 +1827,7 @@ C_fctexport C_cpsproc(C_dump_heap_state) C_noret;
 C_fctexport C_cpsproc(C_filter_heap_objects) C_noret;
 
 C_fctexport time_t C_fcall C_seconds(C_long *ms) C_regparm;
+C_fctexport C_word C_i_dump_statistical_profile();
 C_fctexport C_word C_a_i_list(C_word **a, int c, ...);
 C_fctexport C_word C_a_i_string(C_word **a, int c, ...);
 C_fctexport C_word C_a_i_record(C_word **a, int c, ...);
diff --git a/library.scm b/library.scm
index a07d3140..9fa47610 100644
--- a/library.scm
+++ b/library.scm
@@ -3946,6 +3946,8 @@ EOF
   (when (##sys#fudge 37)		; -:H given?
     (##sys#print "\n" #f ##sys#standard-error)
     (##sys#dump-heap-state))
+  (when (##sys#fudge 45)		; -:p or -:P given?
+    (##core#inline "C_i_dump_statistical_profile"))
   (let loop ()
     (let ((tasks ##sys#cleanup-tasks))
       (set! ##sys#cleanup-tasks '())
diff --git a/manual/Using the compiler b/manual/Using the compiler
index 39ec22c8..3ea2f784 100644
--- a/manual/Using the compiler	
+++ b/manual/Using the compiler	
@@ -137,7 +137,7 @@ the source text should be read from standard input.
 ; -prelude EXPRESSIONS : Add {{EXPRESSIONS}} before all other toplevel expressions in the compiled file.  This option may be given multiple times. Processing of this option takes place before processing of {{-prologue}}.
 
 ; -profile : 
-; -accumulate-profile : Instruments the source code to count procedure calls and execution times. After the program terminates (either via an explicit {{exit}} or implicitly), profiling statistics are written to a file named {{PROFILE.<randomnumber>}}. Each line of the generated file contains a list with the procedure name, the number of calls and the time spent executing it. Use the {{chicken-profile}} program to display the profiling information in a more user-friendly form. Enter {{chicken-profile -help}} at the command line to get a list of available options. The {{-accumulate-profile}} option is similar to {{-profile}}, but the resulting profile information will be appended to any existing {{PROFILE}} file. {{chicken-profile}} will merge and sum up the accumulated timing information, if several entries for the same procedure calls exist. Only profiling information for global procedures will be collected.
+; -accumulate-profile : Instruments the source code to count procedure calls and execution times. After the program terminates (either via an explicit {{exit}} or implicitly), profiling statistics are written to a file named {{PROFILE.<randomnumber>}}. Each line of the generated file contains a list with the procedure name, the number of calls and the time spent executing it. Use the {{chicken-profile}} program to display the profiling information in a more user-friendly form. Enter {{chicken-profile -help}} at the command line to get a list of available options. The {{-accumulate-profile}} option is similar to {{-profile}}, but the resulting profile information will be appended to any existing {{PROFILE}} file. {{chicken-profile}} will merge and sum up the accumulated timing information, if several entries for the same procedure calls exist. Only profiling information for global procedures will be collected.  See the {{-:p}} option under [[#runtime-options|"Runtime options"]] below for statistical profiling support.
 
 ; -profile-name FILENAME : Specifies name of the generated profile information (which defaults to {{PROFILE.<randomnumber>}}. Implies {{-profile}}.
 
@@ -224,6 +224,10 @@ compiler itself) accept a small set of runtime options:
 
 ; {{-:o}} : Disables detection of stack overflows at run-time.
 
+; {{-:p}} : Enable collection of statistics for profiling purposes and write to PROFILE.{{pid}} on exit.  This functions at a granularity defined by the trace information in the binary and libraries: each traced function will show up in the output.  See the {{-profile}} compiler option for instrumentation-based profiling.  The {{PROFILE.pid}} format is compatible with the format generated by instrumentation-based profiling.
+
+; {{-:Pfreq}} : Same as {{-:p}} but set the sampling frequency in microseconds (default is 10000 microseconds or every 10 milliseconds).
+
 ; {{-:r}} : Writes trace output to stderr. This option has no effect with in files compiled with the {{-no-trace}} options.
 
 ; {{-:sNUMBER}} : Specifies stack size.
diff --git a/runtime.c b/runtime.c
index 112314d3..a9cff72a 100644
--- a/runtime.c
+++ b/runtime.c
@@ -63,6 +63,15 @@
 # define EOVERFLOW  0
 #endif
 
+/* ITIMER_PROF is more precise, but Cygwin doesn't support it... */
+#ifdef __CYGWIN__
+# define C_PROFILE_SIGNAL SIGALRM
+# define C_PROFILE_TIMER  ITIMER_REAL
+#else
+# define C_PROFILE_SIGNAL SIGPROF
+# define C_PROFILE_TIMER  ITIMER_PROF
+#endif
+
 /* TODO: Include sys/select.h? Windows doesn't seem to have it... */
 #ifndef NO_POSIX_POLL
 #  include <poll.h>
@@ -154,6 +163,7 @@ static C_TLS int timezone;
 #define TEMPORARY_STACK_SIZE	       4096
 #define STRING_BUFFER_SIZE             4096
 #define DEFAULT_MUTATION_STACK_SIZE    1024
+#define PROFILE_TABLE_SIZE             1024
 
 #define MAX_PENDING_INTERRUPTS         100
 
@@ -302,6 +312,14 @@ typedef struct hdump_bucket_struct
   struct hdump_bucket_struct *next;
 } HDUMP_BUCKET;
 
+typedef struct profile_bucket_struct
+{
+  C_char *key;
+  C_uword sample_count; /* Multiplied by profile freq = time spent */
+  C_uword call_count;   /* Distinct calls seen while sampling */
+  struct profile_bucket_struct *next;
+} PROFILE_BUCKET;
+
 
 /* Variables: */
 
@@ -351,7 +369,9 @@ C_TLS C_uword
   C_heap_growth,
   C_heap_shrinkage;
 C_TLS C_uword C_maximal_heap_size;
-C_TLS time_t C_startup_time_seconds;
+C_TLS time_t
+  C_startup_time_seconds,
+  profile_frequency = 10000;
 C_TLS char 
   **C_main_argv,
   *C_dlerror;
@@ -424,7 +444,9 @@ static C_TLS int
   chicken_ran_once,
   pass_serious_signals = 1,
   callback_continuation_level;
-static volatile C_TLS int serious_signal_occurred = 0;
+static volatile C_TLS int
+  serious_signal_occurred = 0,
+  profiling = 0;
 static C_TLS unsigned int
   mutation_count,
   tracked_mutation_count,
@@ -459,6 +481,7 @@ static C_TLS FINALIZER_NODE
 static C_TLS void *current_module_handle;
 static C_TLS int flonum_print_precision = FLONUM_PRINT_PRECISION;
 static C_TLS HDUMP_BUCKET **hdump_table;
+static C_TLS PROFILE_BUCKET **profile_table = NULL;
 static C_TLS int 
   pending_interrupts[ MAX_PENDING_INTERRUPTS ],
   pending_interrupts_count,
@@ -491,6 +514,7 @@ static void C_fcall really_remark(C_word *x) C_regparm;
 static C_word C_fcall intern0(C_char *name) C_regparm;
 static void C_fcall update_locative_table(int mode) C_regparm;
 static LF_LIST *find_module_handle(C_char *name);
+static void take_profile_sample();
 
 static C_cpsproc(call_cc_wrapper) C_noret;
 static C_cpsproc(call_cc_values_wrapper) C_noret;
@@ -715,12 +739,15 @@ int CHICKEN_initialize(int heap, int stack, int symbols, void *toplevel)
   dlopen_flags = 0;
 #endif
 
-  /* setup signal handlers */
-  if(!pass_serious_signals) {
 #ifdef HAVE_SIGACTION
     sa.sa_flags = 0;
     sigfillset(&sa.sa_mask); /* See note in C_establish_signal_handler() */
     sa.sa_handler = global_signal_handler;
+#endif
+
+  /* setup signal handlers */
+  if(!pass_serious_signals) {
+#ifdef HAVE_SIGACTION
     C_sigaction(SIGBUS, &sa, NULL);
     C_sigaction(SIGFPE, &sa, NULL);
     C_sigaction(SIGILL, &sa, NULL);
@@ -759,6 +786,21 @@ int CHICKEN_initialize(int heap, int stack, int symbols, void *toplevel)
   callback_continuation_level = 0;
   gc_ms = 0;
   (void)C_randomize(C_fix(time(NULL)));
+
+  if (profiling) {
+#ifdef HAVE_SIGACTION
+    C_sigaction(C_PROFILE_SIGNAL, &sa, NULL);
+#else
+    C_signal(C_PROFILE_SIGNAL, global_signal_handler);
+#endif
+
+    profile_table = (PROFILE_BUCKET **)C_malloc(PROFILE_TABLE_SIZE * sizeof(PROFILE_BUCKET *));
+
+    if(profile_table == NULL)
+      panic(C_text("out of memory - can not allocate profile table"));
+
+    C_memset(profile_table, 0, sizeof(PROFILE_BUCKET *) * PROFILE_TABLE_SIZE);
+  }
   
   /* create k to invoke code for system-startup: */
   k0 = (C_SCHEME_BLOCK *)C_align((C_word)C_fromspace_top);
@@ -1081,7 +1123,10 @@ void global_signal_handler(int signum)
   }
 #endif
 
-  C_raise_interrupt(signal_mapping_table[ signum ]);
+  /* TODO: Make full use of sigaction: check that /our/ timer expired */
+  if (signum == C_PROFILE_SIGNAL && profiling) take_profile_sample();
+  else C_raise_interrupt(signal_mapping_table[ signum ]);
+
 #ifndef HAVE_SIGACTION
   /* not necessarily needed, but older UNIXen may not leave the handler installed: */
   C_signal(signum, global_signal_handler);
@@ -1246,6 +1291,8 @@ void CHICKEN_parse_command_line(int argc, char *argv[], C_word *heap, C_word *st
 		 " -:hsPERCENTAGE   set heap shrink percentage\n"
 		 " -:hSIZE          set fixed heap size\n"
 		 " -:r              write trace output to stderr\n"
+		 " -:p              collect statistical profile and write to file at exit\n"
+		 " -:PFREQ          like -:p, specifying sampling frequency in us (default: 10000)\n"
 		 " -:sSIZE          set nursery (stack) size\n"
 		 " -:tSIZE          set symbol-table size\n"
                  " -:fSIZE          set maximal number of pending finalizers\n"
@@ -1340,6 +1387,15 @@ void CHICKEN_parse_command_line(int argc, char *argv[], C_word *heap, C_word *st
 	  C_enable_gcweak = 1;
 	  break;
 
+	case 'P':
+	  profiling = 1;
+	  profile_frequency = arg_val(ptr);
+          goto next;
+
+	case 'p':
+	  profiling = 1;
+          break;
+
 	case 'r':
 	  show_trace = 1;
 	  break;
@@ -1408,6 +1464,18 @@ C_word CHICKEN_run(void *toplevel)
   chicken_is_running = chicken_ran_once = 1;
   return_to_host = 0;
 
+  if(profiling) {
+    struct itimerval itv;
+
+    itv.it_value.tv_sec = profile_frequency / 1000000;
+    itv.it_value.tv_usec = profile_frequency % 1000000;
+    itv.it_interval.tv_sec = itv.it_value.tv_sec;
+    itv.it_interval.tv_usec = itv.it_value.tv_usec;
+
+    if (setitimer(C_PROFILE_TIMER, &itv, NULL) == -1)
+      panic(C_text("error setting timer for profiling"));
+  }
+
 #if C_STACK_GROWS_DOWNWARD
   C_stack_limit = (C_word *)((C_byte *)C_stack_pointer - stack_size);
 #else
@@ -1436,6 +1504,18 @@ C_word CHICKEN_run(void *toplevel)
     ((C_proc)C_restart_trampoline)(C_restart_c, p);
   }
 
+  if(profiling) {
+    struct itimerval itv;
+
+    itv.it_value.tv_sec = 0;
+    itv.it_value.tv_usec = 0;
+    itv.it_interval.tv_sec = itv.it_value.tv_sec;
+    itv.it_interval.tv_usec = itv.it_value.tv_usec;
+
+    if (setitimer(C_PROFILE_TIMER, &itv, NULL) == -1)
+      panic(C_text("error clearing timer for profiling"));
+  }
+
   chicken_is_running = 0;
   return C_restore;
 }
@@ -3786,6 +3866,59 @@ C_regparm void *C_fcall C_retrieve2_symbol_proc(C_word val, char *name)
   return C_fast_retrieve_proc(val);
 }
 
+/* Bump profile count for current top of trace buffer */
+static void take_profile_sample()
+{
+  PROFILE_BUCKET **bp, *b;
+  C_char *key;
+  TRACE_INFO *tb;
+  /* To count distinct calls of a procedure, remember last call */
+  static C_char *prev_key = NULL;
+  static TRACE_INFO *prev_tb = NULL;
+
+  /* trace_buffer_top points *beyond* the topmost entry: Go back one */
+  if (trace_buffer_top == trace_buffer) {
+    if (!trace_buffer_full) return; /* No data yet */
+    tb = trace_buffer_limit - 1;
+  } else {
+    tb = trace_buffer_top - 1;
+  }
+
+  key = tb->raw;
+  if (key == NULL) return; /* May happen while in C_trace() */
+
+  /* We could also just hash the pointer but that's a bit trickier */
+  bp = profile_table + hash_string(C_strlen(key), key, PROFILE_TABLE_SIZE, 0, 0);
+  b = *bp;
+
+  /* First try to find pre-existing item in hash table */
+  while(b != NULL) {
+    if(b->key == key) {
+      b->sample_count++;
+      if (prev_key != key && prev_tb != tb)
+        b->call_count++;
+      goto done;
+    }
+    else b = b->next;
+  }
+
+  /* Not found, allocate a new item and use it as bucket's new head */
+  b = (PROFILE_BUCKET *)C_malloc(sizeof(PROFILE_BUCKET));
+
+  if(b == NULL)
+    panic(C_text("out of memory - cannot allocate profile table-bucket"));
+
+  b->next = *bp;
+  b->key = key;
+  *bp = b;
+  b->sample_count = 1;
+  b->call_count = 1;
+
+done:
+  prev_tb = tb;
+  prev_key = key;
+}
+
 
 C_regparm void C_fcall C_trace(C_char *name)
 {
@@ -3869,7 +4002,9 @@ C_char *C_dump_trace(int start)
 
 C_regparm void C_fcall C_clear_trace_buffer(void)
 {
-  int i;
+  int i, old_profiling = profiling;
+
+  profiling = 0;
 
   if(trace_buffer == NULL) {
     if(C_trace_buffer_size < MIN_TRACE_BUFFER_SIZE)
@@ -3890,15 +4025,19 @@ C_regparm void C_fcall C_clear_trace_buffer(void)
     trace_buffer[ i ].cooked2 = C_SCHEME_FALSE;
     trace_buffer[ i ].thread = C_SCHEME_FALSE;
   }
+
+  profiling = old_profiling;
 }
 
 C_word C_resize_trace_buffer(C_word size) {
-  int old_size = C_trace_buffer_size;
+  int old_size = C_trace_buffer_size, old_profiling = profiling;
   assert(trace_buffer);
+  profiling = 0;
   free(trace_buffer);
   trace_buffer = NULL;
   C_trace_buffer_size = C_unfix(size);
   C_clear_trace_buffer();
+  profiling = old_profiling;
   return(C_fix(old_size));
 }
 
@@ -4417,6 +4556,9 @@ C_regparm C_word C_fcall C_fudge(C_word fudge_factor)
   case C_fix(44):  /* whether debugger is active */
     return C_mk_bool(C_debugging);
 
+  case C_fix(45):  /* Whether we're currently profiling */
+    return C_mk_bool(profiling);
+
   default: return C_SCHEME_UNDEFINED;
   }
 }
@@ -9268,6 +9410,65 @@ C_i_get_keyword(C_word kw, C_word args, C_word def)
   return def;
 }
 
+C_word C_i_dump_statistical_profile()
+{
+  PROFILE_BUCKET *b, *b2, **bp;
+  FILE *fp;
+  C_char *k1, *k2 = NULL;
+  int n;
+  double ms;
+  struct itimerval itv;
+
+  assert(profiling);
+  assert(profile_table != NULL);
+
+  itv.it_value.tv_sec = 0;
+  itv.it_value.tv_usec = 0;
+  itv.it_interval.tv_sec = itv.it_value.tv_sec;
+  itv.it_interval.tv_usec = itv.it_value.tv_usec;
+
+  if (setitimer(C_PROFILE_TIMER, &itv, NULL) == -1)
+    panic(C_text("error clearing timer for profiling"));
+
+  profiling = 0; /* In case a SIGPROF is delivered late */
+  bp = profile_table;
+
+  C_snprintf(buffer, STRING_BUFFER_SIZE, C_text("PROFILE.%d"), C_getpid());
+
+  if(debug_mode)
+    C_dbg(C_text("debug"), C_text("dumping statistical profile to `%s'...\n"), buffer);
+
+  fp = C_fopen(buffer, "w");
+  if (fp == NULL)
+    panic(C_text("could not write profile!"));
+
+  for(n = 0; n < PROFILE_TABLE_SIZE; ++n) {
+    for(b = bp[ n ]; b != NULL; b = b2) {
+      b2 = b->next;
+
+      k1 = b->key;
+      C_fputs(C_text("(|"), fp);
+      /* Dump raw C string as if it were a symbol */
+      while((k2 = C_strpbrk(k1, C_text("\\|"))) != NULL) {
+        C_fwrite(k1, 1, k2-k1, fp);
+        C_fputc('\\', fp);
+        C_fputc(*k2, fp);
+        k1 = k2+1;
+      }
+      C_fputs(k1, fp);
+      ms = (double)b->sample_count * (double)profile_frequency / 1000.0;
+      C_fprintf(fp, C_text("| " UWORD_COUNT_FORMAT_STRING " %lf)\n"),
+                b->call_count, ms);
+      C_free(b);
+    }
+  }
+
+  C_fclose(fp);
+  C_free(profile_table);
+  profile_table = NULL;
+
+  return C_SCHEME_UNDEFINED;
+}
 
 void C_ccall C_dump_heap_state(C_word c, C_word *av)
 {
diff --git a/support.scm b/support.scm
index 888933d3..28437ff6 100644
--- a/support.scm
+++ b/support.scm
@@ -168,7 +168,7 @@
 	((string? x) (string->symbol x))
 	(else (string->symbol (sprintf "~a" x))) ) )
 
-(define (backslashify s) (string-translate (->string s) "\\" "\\\\"))
+(define (backslashify s) (string-translate* (->string s) '(("\\" . "\\\\"))))
 
 (define (uncommentify s) (string-translate* (->string s) '(("*/" . "*_/"))))
   
Trap