From: hgn <hgodden00@gmail.com>
Date: Mon, 15 May 2023 16:12:15 +0000 (+0100)
Subject: maths api changes and random
X-Git-Url: https://harrygodden.com/git/?p=vg.git;a=commitdiff_plain;h=bafc8bc648b83f3b5f5567eedde1fc4533660e4f

maths api changes and random
---

diff --git a/projects/birds.c b/projects/birds.c
index 81646be..a07d818 100644
--- a/projects/birds.c
+++ b/projects/birds.c
@@ -5,6 +5,7 @@
 
 #define SYNTH_BIRD_STDLIB
 #include "vg_audio_synth_bird.h"
+#include "vg_m.h"
 
 #define WRAP1S( X ) (X)%44100
 
@@ -22,17 +23,6 @@
  *
  */
 
-static double rand_float( double min, double max )
-{
-   double r = (double)(rand()&(4096-1))*(1.0/4096.0);
-   return min + r*(max-min);
-}
-
-static int rand_seconds( double min, double max )
-{
-   return rand_float( min*44100.0, max*44100.0 );
-}
-
 static void vg_dsp_init( void );
 static void vg_dsp_process( float *stereo_in, float *stereo_out );
 
@@ -212,16 +202,10 @@ static struct dsp_delay __echos[8];
 static struct dsp_lpf   __echos_lpf[8];
 static struct dsp_schroeder __diffusion_chain[8];
 
-static inline float vg_lerpf( float a, float b, float t )
-{
-   return a + t*(b-a);
-}
-
 static void vg_dsp_init( void )
 {
    /* temporary global design */
 
-
    dsp_init_lpf( &__lpf_mud_free, 125.0f );
    dsp_init_lpf( &__hpf_mud_free, 500.0f );
 
@@ -234,7 +218,7 @@ static void vg_dsp_init( void )
    {
       float reflection_time = ((sizes[i])/343.0f) * 1000.0f;
 
-      float var   = 1.0f + rand_float(-1.0,1.0) * reflection_variance,
+      float var   = 1.0f + vg_randf64_range(-1.0,1.0) * reflection_variance,
             total = reflection_time * var;
 
       dsp_init_delay( &__echos[i], total / 1000.0f );
diff --git a/vg.h b/vg.h
index 6cbcb96..62651c0 100644
--- a/vg.h
+++ b/vg.h
@@ -862,6 +862,7 @@ VG_STATIC void _vg_terminate(void)
 
 VG_STATIC void vg_enter( int argc, char *argv[], const char *window_name )
 {
+   vg_rand_seed( 461 );
    _vg_process_launch_opts_internal( argc, argv );
 
    /* Systems init */
diff --git a/vg_audio_dsp.h b/vg_audio_dsp.h
index c973cc9..0eaeb54 100644
--- a/vg_audio_dsp.h
+++ b/vg_audio_dsp.h
@@ -167,7 +167,7 @@ static void vg_dsp_init( void )
    for( int i=0; i<8; i++ ){
       float reflection_time = ((sizes[i])/343.0f) * 1000.0f;
 
-      float var   = 1.0f + (vg_randf()*2.0f - 1.0f) * reflection_variance,
+      float var   = 1.0f + (vg_randf64()*2.0f - 1.0f) * reflection_variance,
             total = reflection_time * var;
 
       dsp_init_delay( &__echos[i], total / 1000.0f );
diff --git a/vg_m.h b/vg_m.h
index a8b2091..c07f389 100644
--- a/vg_m.h
+++ b/vg_m.h
@@ -1,4 +1,26 @@
-/* Copyright (C) 2021-2022 Harry Godden (hgn) - All Rights Reserved */
+/* Copyright (C) 2021-2023 Harry Godden (hgn) - All Rights Reserved 
+ *
+ *  0. Misc
+ *  1. Scalar operations
+ *  2. Vectors
+ *    2.a 2D Vectors
+ *    2.b 3D Vectors
+ *    2.c 4D Vectors
+ *  3. Quaternions
+ *  4. Matrices
+ *    4.a 2x2 matrices
+ *    4.b 3x3 matrices
+ *    4.c 4x3 matrices
+ *    4.d 4x4 matrices
+ *  5. Geometry
+ *    5.a Boxes
+ *    5.b Planes
+ *    5.c Closest points
+ *    5.d Raycast & Spherecasts
+ *    5.e Curves
+ *  6. Statistics
+ *    6.a Random numbers
+ **/
 
 #ifndef VG_M_H
 #define VG_M_H
@@ -9,91 +31,83 @@
 
 #define VG_PIf  3.14159265358979323846264338327950288f
 #define VG_TAUf 6.28318530717958647692528676655900576f
+/*
+ * -----------------------------------------------------------------------------
+ * Section 0.                    Misc Operations
+ * -----------------------------------------------------------------------------
+ */
 
-static u32 vg_ftu32( float a )
+/* get the f32 as the raw bits in a u32 without converting */
+static u32 vg_ftu32( f32 a )
 {
    u32 *ptr = (u32 *)(&a);
    return *ptr;
 }
 
-static int vg_isinff( float a )
+/* check if f32 is infinite */
+static int vg_isinff( f32 a )
 {
    return ((vg_ftu32(a)) & 0x7FFFFFFFU) == 0x7F800000U;
 }
 
-static int vg_isnanf( float a )
+/* check if f32 is not a number */
+static int vg_isnanf( f32 a )
 {
    return !vg_isinff(a) && ((vg_ftu32(a)) & 0x7F800000U) == 0x7F800000U;
 }
 
-static int vg_validf( float a )
+/* check if f32 is a number and is not infinite */
+static int vg_validf( f32 a )
 {
    return ((vg_ftu32(a)) & 0x7F800000U) != 0x7F800000U;
 }
 
-static inline float vg_minf( float a, float b )
-{
-   return a < b? a: b;
-}
+/*
+ * -----------------------------------------------------------------------------
+ * Section 1.                   Scalar Operations
+ * -----------------------------------------------------------------------------
+ */
 
-static inline float vg_maxf( float a, float b )
-{
-   return a > b? a: b;
-}
+static inline f32 vg_minf( f32 a, f32 b ){ return a < b? a: b; }
+static inline f32 vg_maxf( f32 a, f32 b ){ return a > b? a: b; }
+
+static inline int vg_min( int a, int b ){ return a < b? a: b; }
+static inline int vg_max( int a, int b ){ return a > b? a: b; }
 
-static inline float vg_clampf( float a, float min, float max )
+static inline f32 vg_clampf( f32 a, f32 min, f32 max )
 {
    return vg_minf( max, vg_maxf( a, min ) );
 }
 
-static inline float vg_signf( float a )
+static inline f32 vg_signf( f32 a )
 {
    return a < 0.0f? -1.0f: 1.0f;
 }
 
-static inline float vg_fractf( float a )
+static inline f32 vg_fractf( f32 a )
 {
    return a - floorf( a );
 }
 
-
-__attribute__ ((deprecated))
-static float stable_force( float current, float diff )
-{
-   float fnew = current + diff;
-
-   if( fnew * current < 0.0f )
-      return 0.0f;
-
-   return fnew;
-}
-
-static float vg_cfrictf( float current, float F )
-{
-   return -vg_signf(current) * vg_minf( F, fabsf(current) );
-}
-
-static inline int vg_min( int a, int b )
-{
-   return a < b? a: b;
-}
-
-static inline int vg_max( int a, int b )
+static f32 vg_cfrictf( f32 velocity, f32 F )
 {
-   return a > b? a: b;
+   return -vg_signf(velocity) * vg_minf( F, fabsf(velocity) );
 }
 
-static inline float vg_rad( float deg )
+static inline f32 vg_rad( f32 deg )
 {
    return deg * VG_PIf / 180.0f;
 }
 
 /*
- * Vector 3
+ * -----------------------------------------------------------------------------
+ * Section 2.a                   2D Vectors
+ * -----------------------------------------------------------------------------
  */
-static inline void v2_copy( v2f a, v2f b )
+
+static inline void v2_copy( v2f a, v2f d )
 {
-   b[0] = a[0]; b[1] = a[1];
+   d[0] = a[0]; d[1] = a[1];
 }
 
 static inline void v2_zero( v2f a )
@@ -101,22 +115,12 @@ static inline void v2_zero( v2f a )
    a[0] = 0.f; a[1] = 0.f;
 }
 
-static inline void v2i_copy( v2i a, v2i b )
-{
-   b[0] = a[0]; b[1] = a[1];
-}
-
-static inline int v2i_eq( v2i a, v2i b )
-{
-   return ((a[0] == b[0]) && (a[1] == b[1]));
-}
-
-static inline void v2i_add( v2i a, v2i b, v2i d )
+static inline void v2_add( v2f a, v2f b, v2f d )
 {
    d[0] = a[0]+b[0]; d[1] = a[1]+b[1];
 }
 
-static inline void v2i_sub( v2i a, v2i b, v2i d )
+static inline void v2_sub( v2f a, v2f b, v2f d )
 {
    d[0] = a[0]-b[0]; d[1] = a[1]-b[1];
 }
@@ -133,38 +137,28 @@ static inline void v2_maxv( v2f a, v2f b, v2f dest )
    dest[1] = vg_maxf(a[1], b[1]);
 }
 
-static inline void v2_sub( v2f a, v2f b, v2f d )
-{
-   d[0] = a[0]-b[0]; d[1] = a[1]-b[1];
-}
-
-static inline float v2_dot( v2f a, v2f b )
+static inline f32 v2_dot( v2f a, v2f b )
 {
    return a[0] * b[0] + a[1] * b[1];
 }
 
-static inline float v2_cross( v2f a, v2f b )
+static inline f32 v2_cross( v2f a, v2f b )
 {
    return a[0]*b[1] - a[1]*b[0];
 }
 
-static inline void v2_add( v2f a, v2f b, v2f d )
-{
-   d[0] = a[0]+b[0]; d[1] = a[1]+b[1];
-}
-
 static inline void v2_abs( v2f a, v2f d )
 {
    d[0] = fabsf( a[0] );
    d[1] = fabsf( a[1] );
 }
 
-static inline void v2_muls( v2f a, float s, v2f d )
+static inline void v2_muls( v2f a, f32 s, v2f d )
 {
    d[0] = a[0]*s; d[1] = a[1]*s;
 }
 
-static inline void v2_divs( v2f a, float s, v2f d )
+static inline void v2_divs( v2f a, f32 s, v2f d )
 {
    d[0] = a[0]/s; d[1] = a[1]/s;
 }
@@ -186,35 +180,35 @@ static inline void v2_muladd( v2f a, v2f b, v2f s, v2f d )
    d[1] = a[1]+b[1]*s[1];
 }
 
-static inline void v2_muladds( v2f a, v2f b, float s, v2f d )
+static inline void v2_muladds( v2f a, v2f b, f32 s, v2f d )
 {
    d[0] = a[0]+b[0]*s; 
    d[1] = a[1]+b[1]*s;
 }
 
-static inline float v2_length2( v2f a )
+static inline f32 v2_length2( v2f a )
 {
    return a[0]*a[0] + a[1]*a[1];
 }
 
-static inline float v2_length( v2f a )
+static inline f32 v2_length( v2f a )
 {
    return sqrtf( v2_length2( a ) );
 }
 
-static inline float v2_dist2( v2f a, v2f b )
+static inline f32 v2_dist2( v2f a, v2f b )
 {
    v2f delta;
    v2_sub( a, b, delta );
    return v2_length2( delta );
 }
 
-static inline float v2_dist( v2f a, v2f b )
+static inline f32 v2_dist( v2f a, v2f b )
 {
    return sqrtf( v2_dist2( a, b ) );
 }
 
-static inline void v2_lerp( v2f a, v2f b, float t, v2f d )
+static inline void v2_lerp( v2f a, v2f b, f32 t, v2f d )
 {
    d[0] = a[0] + t*(b[0]-a[0]);
    d[1] = a[1] + t*(b[1]-a[1]);
@@ -227,7 +221,7 @@ static inline void v2_normalize( v2f a )
 
 static void v2_normalize_clamp( v2f a )
 {
-   float l2 = v2_length2( a );
+   f32 l2 = v2_length2( a );
    if( l2 > 1.0f )
       v2_muls( a, 1.0f/sqrtf(l2), a );
 }
@@ -238,48 +232,65 @@ static inline void v2_floor( v2f a, v2f b )
    b[1] = floorf( a[1] );
 }
 
-static inline void v2_fill( v2f a, float v )
+static inline void v2_fill( v2f a, f32 v )
 {
    a[0] = v;
    a[1] = v;
 }
 
-/* copysign of b to a */
 static inline void v2_copysign( v2f a, v2f b )
 {
    a[0] = copysignf( a[0], b[0] );
    a[1] = copysignf( a[1], b[1] );
 }
 
-/*
- * Vector 3
- */
-static inline void v3_zero( v3f a )
+/* integer variants 
+ * ---------------- */
+
+static inline void v2i_copy( v2i a, v2i b )
 {
-   a[0] = 0.f; a[1] = 0.f; a[2] = 0.f;
+   b[0] = a[0]; b[1] = a[1];
+}
+
+static inline int v2i_eq( v2i a, v2i b )
+{
+   return ((a[0] == b[0]) && (a[1] == b[1]));
+}
+
+static inline void v2i_add( v2i a, v2i b, v2i d )
+{
+   d[0] = a[0]+b[0]; d[1] = a[1]+b[1];
+}
+
+static inline void v2i_sub( v2i a, v2i b, v2i d )
+{
+   d[0] = a[0]-b[0]; d[1] = a[1]-b[1];
 }
 
+/*
+ * -----------------------------------------------------------------------------
+ * Section 2.b                   3D Vectors
+ * -----------------------------------------------------------------------------
+ */
+
 static inline void v3_copy( v3f a, v3f b )
 {
    b[0] = a[0]; b[1] = a[1]; b[2] = a[2];
 }
 
-static inline void v3_add( v3f a, v3f b, v3f d )
+static inline void v3_zero( v3f a )
 {
-   d[0] = a[0]+b[0]; d[1] = a[1]+b[1]; d[2] = a[2]+b[2];
+   a[0] = 0.f; a[1] = 0.f; a[2] = 0.f;
 }
 
-static inline void v3i_add( v3i a, v3i b, v3i d )
+static inline void v3_add( v3f a, v3f b, v3f d )
 {
    d[0] = a[0]+b[0]; d[1] = a[1]+b[1]; d[2] = a[2]+b[2];
 }
 
-static inline void v4_add( v4f a, v4f b, v4f d )
+static inline void v3i_add( v3i a, v3i b, v3i d )
 {
-   d[0] = a[0]+b[0]; 
-   d[1] = a[1]+b[1];
-   d[2] = a[2]+b[2];
-   d[3] = a[3]+b[3];
+   d[0] = a[0]+b[0]; d[1] = a[1]+b[1]; d[2] = a[2]+b[2];
 }
 
 static inline void v3_sub( v3f a, v3f b, v3f d )
@@ -304,19 +315,19 @@ static inline void v3_div( v3f a, v3f b, v3f d )
    d[2] = b[2]!=0.0f? a[2]/b[2]: INFINITY;
 }
 
-static inline void v3_muls( v3f a, float s, v3f d )
+static inline void v3_muls( v3f a, f32 s, v3f d )
 {
    d[0] = a[0]*s; d[1] = a[1]*s; d[2] = a[2]*s;
 }
 
-static inline void v3_fill( v3f a, float v )
+static inline void v3_fill( v3f a, f32 v )
 {
    a[0] = v;
    a[1] = v;
    a[2] = v;
 }
 
-static inline void v3_divs( v3f a, float s, v3f d )
+static inline void v3_divs( v3f a, f32 s, v3f d )
 {
    if( s == 0.0f )
       v3_fill( d, INFINITY );
@@ -328,7 +339,7 @@ static inline void v3_divs( v3f a, float s, v3f d )
    }
 }
 
-static inline void v3_muladds( v3f a, v3f b, float s, v3f d )
+static inline void v3_muladds( v3f a, v3f b, f32 s, v3f d )
 {
    d[0] = a[0]+b[0]*s; d[1] = a[1]+b[1]*s; d[2] = a[2]+b[2]*s;
 }
@@ -340,7 +351,7 @@ static inline void v3_muladd( v2f a, v2f b, v2f s, v2f d )
    d[2] = a[2]+b[2]*s[2];
 }
 
-static inline float v3_dot( v3f a, v3f b )
+static inline f32 v3_dot( v3f a, v3f b )
 {
    return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
 }
@@ -354,24 +365,24 @@ static inline void v3_cross( v3f a, v3f b, v3f dest )
    v3_copy( d, dest );
 }
 
-static inline float v3_length2( v3f a )
+static inline f32 v3_length2( v3f a )
 {
    return v3_dot( a, a );
 }
 
-static inline float v3_length( v3f a )
+static inline f32 v3_length( v3f a )
 {
    return sqrtf( v3_length2( a ) );
 }
 
-static inline float v3_dist2( v3f a, v3f b )
+static inline f32 v3_dist2( v3f a, v3f b )
 {
    v3f delta;
    v3_sub( a, b, delta );
    return v3_length2( delta );
 }
 
-static inline float v3_dist( v3f a, v3f b )
+static inline f32 v3_dist( v3f a, v3f b )
 {
    return sqrtf( v3_dist2( a, b ) );
 }
@@ -381,25 +392,25 @@ static inline void v3_normalize( v3f a )
    v3_muls( a, 1.f / v3_length( a ), a );
 }
 
-static inline float vg_lerpf( float a, float b, float t )
+static inline f32 vg_lerpf( f32 a, f32 b, f32 t )
 {
    return a + t*(b-a);
 }
 
-static inline double vg_lerp( double a, double b, double t )
+static inline f64 vg_lerp( f64 a, f64 b, f64 t )
 {
    return a + t*(b-a);
 }
 
 /* correctly lerp around circular period -pi -> pi */
-static float vg_alerpf( float a, float b, float t )
+static f32 vg_alerpf( f32 a, f32 b, f32 t )
 {
-   float d = fmodf( b-a, VG_TAUf ),
+   f32 d = fmodf( b-a, VG_TAUf ),
          s = fmodf( 2.0f*d, VG_TAUf ) - d;
    return a + s*t;
 }
 
-static inline void v3_lerp( v3f a, v3f b, float t, v3f d )
+static inline void v3_lerp( v3f a, v3f b, f32 t, v3f d )
 {
    d[0] = a[0] + t*(b[0]-a[0]);
    d[1] = a[1] + t*(b[1]-a[1]);
@@ -420,12 +431,12 @@ static inline void v3_maxv( v3f a, v3f b, v3f dest )
    dest[2] = vg_maxf(a[2], b[2]);
 }
 
-static inline float v3_minf( v3f a )
+static inline f32 v3_minf( v3f a )
 {
    return vg_minf( vg_minf( a[0], a[1] ), a[2] );
 }
 
-static inline float v3_maxf( v3f a )
+static inline f32 v3_maxf( v3f a )
 {
    return vg_maxf( vg_maxf( a[0], a[1] ), a[2] );
 }
@@ -451,10 +462,10 @@ static inline void v3_negate( v3f a, v3f b )
    b[2] = -a[2];
 }
 
-static inline void v3_rotate( v3f v, float angle, v3f axis, v3f d ) 
+static inline void v3_rotate( v3f v, f32 angle, v3f axis, v3f d ) 
 {
   v3f v1, v2, k;
-  float c, s;
+  f32 c, s;
 
   c = cosf( angle );
   s = sinf( angle );
@@ -470,19 +481,30 @@ static inline void v3_rotate( v3f v, float angle, v3f axis, v3f d )
 }
 
 /*
- * Vector 4
+ * -----------------------------------------------------------------------------
+ * Section 2.c                   4D Vectors
+ * -----------------------------------------------------------------------------
  */
+
 static inline void v4_copy( v4f a, v4f b )
 {
    b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3];
 }
 
+static inline void v4_add( v4f a, v4f b, v4f d )
+{
+   d[0] = a[0]+b[0]; 
+   d[1] = a[1]+b[1];
+   d[2] = a[2]+b[2];
+   d[3] = a[3]+b[3];
+}
+
 static inline void v4_zero( v4f a )
 {
    a[0] = 0.f; a[1] = 0.f; a[2] = 0.f; a[3] = 0.f;
 }
 
-static inline void v4_muls( v4f a, float s, v4f d )
+static inline void v4_muls( v4f a, f32 s, v4f d )
 {
    d[0] = a[0]*s; 
    d[1] = a[1]*s;
@@ -490,7 +512,7 @@ static inline void v4_muls( v4f a, float s, v4f d )
    d[3] = a[3]*s;
 }
 
-static inline void v4_muladds( v4f a, v4f b, float s, v4f d )
+static inline void v4_muladds( v4f a, v4f b, f32 s, v4f d )
 {
    d[0] = a[0]+b[0]*s; 
    d[1] = a[1]+b[1]*s;
@@ -498,7 +520,7 @@ static inline void v4_muladds( v4f a, v4f b, float s, v4f d )
    d[3] = a[3]+b[3]*s;
 }
 
-static inline void v4_lerp( v4f a, v4f b, float t, v4f d )
+static inline void v4_lerp( v4f a, v4f b, f32 t, v4f d )
 {
    d[0] = a[0] + t*(b[0]-a[0]);
    d[1] = a[1] + t*(b[1]-a[1]);
@@ -506,25 +528,123 @@ static inline void v4_lerp( v4f a, v4f b, float t, v4f d )
    d[3] = a[3] + t*(b[3]-a[3]);
 }
 
-static inline float v4_dot( v4f a, v4f b )
+static inline f32 v4_dot( v4f a, v4f b )
 {
    return a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 }
 
-static inline float v4_length( v4f a )
+static inline f32 v4_length( v4f a )
 {
    return sqrtf( v4_dot(a,a) );
 }
 
 /*
- * Matrix 2x2
+ * -----------------------------------------------------------------------------
+ * Section 3                   Quaternions
+ * -----------------------------------------------------------------------------
+ */
+
+static inline void q_identity( v4f q )
+{
+   q[0] = 0.0f; q[1] = 0.0f; q[2] = 0.0f; q[3] = 1.0f;
+}
+
+static inline void q_axis_angle( v4f q, v3f axis, f32 angle )
+{
+   f32 a = angle*0.5f,
+         c = cosf(a),
+         s = sinf(a);
+
+   q[0] = s*axis[0];
+   q[1] = s*axis[1];
+   q[2] = s*axis[2];
+   q[3] = c;
+}
+
+static inline void q_mul( v4f q, v4f q1, v4f d )
+{
+   v4f t;
+   t[0] = q[3]*q1[0] + q[0]*q1[3] + q[1]*q1[2] - q[2]*q1[1];
+   t[1] = q[3]*q1[1] - q[0]*q1[2] + q[1]*q1[3] + q[2]*q1[0];
+   t[2] = q[3]*q1[2] + q[0]*q1[1] - q[1]*q1[0] + q[2]*q1[3];
+   t[3] = q[3]*q1[3] - q[0]*q1[0] - q[1]*q1[1] - q[2]*q1[2];
+   v4_copy( t, d );
+}
+
+static inline void q_normalize( v4f q )
+{
+   f32 s = 1.0f/ sqrtf(v4_dot(q,q));
+   q[0] *= s;
+   q[1] *= s;
+   q[2] *= s;
+   q[3] *= s;
+}
+
+static inline void q_inv( v4f q, v4f d )
+{
+   f32 s = 1.0f / v4_dot(q,q);
+   d[0] = -q[0]*s;
+   d[1] = -q[1]*s;
+   d[2] = -q[2]*s;
+   d[3] =  q[3]*s;
+}
+
+static inline void q_nlerp( v4f a, v4f b, f32 t, v4f d )
+{
+   if( v4_dot(a,b) < 0.0f ){
+      v4_muls( b, -1.0f, d );
+      v4_lerp( a, d, t, d );
+   }
+   else
+      v4_lerp( a, b, t, d );
+
+   q_normalize( d );
+}
+
+static inline void q_m3x3( v4f q, m3x3f d )
+{
+   f32
+      l = v4_length(q),
+      s = l > 0.0f? 2.0f/l: 0.0f,
+
+      xx = s*q[0]*q[0], xy = s*q[0]*q[1], wx = s*q[3]*q[0],
+      yy = s*q[1]*q[1], yz = s*q[1]*q[2], wy = s*q[3]*q[1],
+      zz = s*q[2]*q[2], xz = s*q[0]*q[2], wz = s*q[3]*q[2];
+
+   d[0][0] = 1.0f - yy - zz;
+   d[1][1] = 1.0f - xx - zz;
+   d[2][2] = 1.0f - xx - yy;
+   d[0][1] = xy + wz;
+   d[1][2] = yz + wx;
+   d[2][0] = xz + wy;
+   d[1][0] = xy - wz;
+   d[2][1] = yz - wx;
+   d[0][2] = xz - wy;
+}
+
+static void q_mulv( v4f q, v3f v, v3f d )
+{
+   v3f v1, v2;
+
+   v3_muls( q, 2.0f*v3_dot(q,v), v1 );
+   v3_muls( v, q[3]*q[3] - v3_dot(q,q), v2 );
+   v3_add( v1, v2, v1 );
+   v3_cross( q, v, v2 );
+   v3_muls( v2, 2.0f*q[3], v2 );
+   v3_add( v1, v2, d );
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * Section 4.a                  2x2 matrices
+ * -----------------------------------------------------------------------------
  */
 
-#define M2X2_INDENTIY    {{1.0f, 0.0f, }, \
-                          { 0.0f, 1.0f, }}
+#define M2X2_INDENTIY {{1.0f, 0.0f, }, \
+                       {0.0f, 1.0f, }}
                           
-#define M2X2_ZERO         {{0.0f, 0.0f, }, \
-                        { 0.0f, 0.0f, }}
+#define M2X2_ZERO     {{0.0f, 0.0f, }, \
+                       {0.0f, 0.0f, }}
 
 static inline void m2x2_copy( m2x2f a, m2x2f b )
 {
@@ -538,9 +658,9 @@ static inline void m2x2_identity( m2x2f a )
    m2x2_copy( id, a );
 }
 
-static inline void m2x2_create_rotation( m2x2f a, float theta )
+static inline void m2x2_create_rotation( m2x2f a, f32 theta )
 {
-   float s, c;
+   f32 s, c;
    
    s = sinf( theta );
    c = cosf( theta );
@@ -552,18 +672,83 @@ static inline void m2x2_create_rotation( m2x2f a, float theta )
 }
 
 /*
- * Matrix 3x3
+ * -----------------------------------------------------------------------------
+ * Section 4.b                  3x3 matrices
+ * -----------------------------------------------------------------------------
  */
 
 #define M3X3_IDENTITY   {{1.0f, 0.0f, 0.0f, },\
                         { 0.0f, 1.0f, 0.0f, },\
                         { 0.0f, 0.0f, 1.0f, }}
                         
-#define M3X3_ZERO         {{0.0f, 0.0f, 0.0f, },\
+#define M3X3_ZERO       {{0.0f, 0.0f, 0.0f, },\
                         { 0.0f, 0.0f, 0.0f, },\
                         { 0.0f, 0.0f, 0.0f, }}
 
 
+static void euler_m3x3( v3f angles, m3x3f d )
+{
+   f32 cosY = cosf( angles[0] ),
+       sinY = sinf( angles[0] ),
+       cosP = cosf( angles[1] ),
+       sinP = sinf( angles[1] ),
+       cosR = cosf( angles[2] ),
+       sinR = sinf( angles[2] );
+
+   d[2][0] = -sinY * cosP;
+   d[2][1] =  sinP;
+   d[2][2] =  cosY * cosP;
+
+   d[0][0] =  cosY * cosR;
+   d[0][1] =  sinR;
+   d[0][2] =  sinY * cosR;
+
+   v3_cross( d[0], d[2], d[1] );
+}
+
+static void m3x3_q( m3x3f m, v4f q )
+{
+   f32 diag, r, rinv;
+
+   diag = m[0][0] + m[1][1] + m[2][2];
+   if( diag >= 0.0f )
+   {
+      r    = sqrtf( 1.0f + diag );
+      rinv = 0.5f / r;
+      q[0] = rinv * (m[1][2] - m[2][1]);
+      q[1] = rinv * (m[2][0] - m[0][2]);
+      q[2] = rinv * (m[0][1] - m[1][0]);
+      q[3] = r    * 0.5f;
+   } 
+   else if( m[0][0] >= m[1][1] && m[0][0] >= m[2][2] )
+   {
+      r    = sqrtf( 1.0f - m[1][1] - m[2][2] + m[0][0] );
+      rinv = 0.5f / r;
+      q[0] = r    * 0.5f;
+      q[1] = rinv * (m[0][1] + m[1][0]);
+      q[2] = rinv * (m[0][2] + m[2][0]);
+      q[3] = rinv * (m[1][2] - m[2][1]);
+   } 
+   else if( m[1][1] >= m[2][2] )
+   {
+      r    = sqrtf( 1.0f - m[0][0] - m[2][2] + m[1][1] );
+      rinv = 0.5f / r;
+      q[0] = rinv * (m[0][1] + m[1][0]);
+      q[1] = r    * 0.5f;
+      q[2] = rinv * (m[1][2] + m[2][1]);
+      q[3] = rinv * (m[2][0] - m[0][2]);
+   } 
+   else 
+   {
+      r    = sqrtf( 1.0f - m[0][0] - m[1][1] + m[2][2] );
+      rinv = 0.5f / r;
+      q[0] = rinv * (m[0][2] + m[2][0]);
+      q[1] = rinv * (m[1][2] + m[2][1]);
+      q[2] = r    * 0.5f;
+      q[3] = rinv * (m[0][1] - m[1][0]);
+   }
+}
+
 /* a X b == [b]T a == ...*/
 static void m3x3_skew_symetric( m3x3f a, v3f v )
 {
@@ -598,7 +783,7 @@ static inline void m3x3_identity( m3x3f a )
    m3x3_copy( id, a );
 }
 
-static void m3x3_diagonal( m3x3f a, float v )
+static void m3x3_diagonal( m3x3f a, f32 v )
 {
    m3x3_identity( a );
    a[0][0] = v;
@@ -621,11 +806,11 @@ static inline void m3x3_zero( m3x3f a )
 
 static inline void m3x3_inv( m3x3f src, m3x3f dest )
 {
-   float a = src[0][0], b = src[0][1], c = src[0][2],
+   f32 a = src[0][0], b = src[0][1], c = src[0][2],
          d = src[1][0], e = src[1][1], f = src[1][2],
          g = src[2][0], h = src[2][1], i = src[2][2];
 
-   float det =    1.f / 
+   f32 det =    1.f / 
                (+a*(e*i-h*f)
                 -b*(d*i-f*g)
                 +c*(d*h-e*g));
@@ -641,7 +826,7 @@ static inline void m3x3_inv( m3x3f src, m3x3f dest )
    dest[2][2] =  (a*e-d*b)*det;
 }
 
-static float m3x3_det( m3x3f m )
+static f32 m3x3_det( m3x3f m )
 {
    return   m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2])
           - m[0][1] * (m[1][0] * m[2][2] - m[1][2] * m[2][0])
@@ -650,7 +835,7 @@ static float m3x3_det( m3x3f m )
 
 static inline void m3x3_transpose( m3x3f src, m3x3f dest )
 {
-   float a = src[0][0], b = src[0][1], c = src[0][2],
+   f32 a = src[0][0], b = src[0][1], c = src[0][2],
          d = src[1][0], e = src[1][1], f = src[1][2],
          g = src[2][0], h = src[2][1], i = src[2][2];
          
@@ -667,7 +852,7 @@ static inline void m3x3_transpose( m3x3f src, m3x3f dest )
 
 static inline void m3x3_mul( m3x3f a, m3x3f b, m3x3f d )
 {
-   float a00 = a[0][0], a01 = a[0][1], a02 = a[0][2],
+   f32 a00 = a[0][0], a01 = a[0][1], a02 = a[0][2],
          a10 = a[1][0], a11 = a[1][1], a12 = a[1][2],
          a20 = a[2][0], a21 = a[2][1], a22 = a[2][2],
 
@@ -698,9 +883,9 @@ static inline void m3x3_mulv( m3x3f m, v3f v, v3f d )
 }
 
 static inline void m3x3_projection( m3x3f dst, 
-      float const left, float const right, float const bottom, float const top )
+      f32 const left, f32 const right, f32 const bottom, f32 const top )
 {
-   float rl, tb;
+   f32 rl, tb;
    
    m3x3_zero( dst );
 
@@ -726,19 +911,19 @@ static inline void m3x3_scale( m3x3f m, v3f v )
    v3_muls( m[2], v[2], m[2] );
 }
 
-static inline void m3x3_scalef( m3x3f m, float f )
+static inline void m3x3_scalef( m3x3f m, f32 f )
 {
    v3f v;
    v3_fill( v, f );
    m3x3_scale( m, v );
 }
 
-static inline void m3x3_rotate( m3x3f m, float angle )
+static inline void m3x3_rotate( m3x3f m, f32 angle )
 {
-   float m00 = m[0][0], m10 = m[1][0],
+   f32 m00 = m[0][0], m10 = m[1][0],
          m01 = m[0][1], m11 = m[1][1],
          m02 = m[0][2], m12 = m[1][2];
-   float c, s;
+   f32 c, s;
 
    s = sinf( angle );
    c = cosf( angle );
@@ -752,73 +937,251 @@ static inline void m3x3_rotate( m3x3f m, float angle )
    m[1][2] = m02 * -s + m12 * c;
 }
 
-static inline void box_addpt( boxf a, v3f pt )
-{
-   v3_minv( a[0], pt, a[0] );
-   v3_maxv( a[1], pt, a[1] );
+/*
+ * -----------------------------------------------------------------------------
+ * Section 4.c                  4x3 matrices
+ * -----------------------------------------------------------------------------
+ */
+
+#define M4X3_IDENTITY   {{1.0f, 0.0f, 0.0f, },\
+                        { 0.0f, 1.0f, 0.0f, },\
+                        { 0.0f, 0.0f, 1.0f, },\
+                        { 0.0f, 0.0f, 0.0f }}
+
+static inline void m4x3_to_3x3( m4x3f a, m3x3f b )
+{
+   v3_copy( a[0], b[0] );
+   v3_copy( a[1], b[1] );
+   v3_copy( a[2], b[2] );
 }
 
-static inline void box_concat( boxf a, boxf b )
+static inline void m4x3_invert_affine( m4x3f a, m4x3f b )
 {
-   v3_minv( a[0], b[0], a[0] );
-   v3_maxv( a[1], b[1], a[1] );
+   m3x3_transpose( a, b );
+   m3x3_mulv( b, a[3], b[3] );
+   v3_negate( b[3], b[3] );
 }
 
-static inline void box_copy( boxf a, boxf b )
+static void m4x3_invert_full( m4x3f src, m4x3f dst )
+{
+  f32 t2, t4, t5,
+        det,
+        a = src[0][0], b = src[0][1], c = src[0][2],
+        e = src[1][0], f = src[1][1], g = src[1][2],
+        i = src[2][0], j = src[2][1], k = src[2][2],
+        m = src[3][0], n = src[3][1], o = src[3][2];
+
+   t2 = j*o - n*k;
+   t4 = i*o - m*k;
+   t5 = i*n - m*j;
+   
+   dst[0][0] =  f*k - g*j;
+   dst[1][0] =-(e*k - g*i);
+   dst[2][0] =  e*j - f*i;
+   dst[3][0] =-(e*t2 - f*t4 + g*t5);
+   
+   dst[0][1] =-(b*k - c*j);
+   dst[1][1] =  a*k - c*i;
+   dst[2][1] =-(a*j - b*i);
+   dst[3][1] =  a*t2 - b*t4 + c*t5;
+   
+   t2 = f*o - n*g;
+   t4 = e*o - m*g; 
+   t5 = e*n - m*f;
+   
+   dst[0][2] =  b*g - c*f ;
+   dst[1][2] =-(a*g - c*e );
+   dst[2][2] =  a*f - b*e ;
+   dst[3][2] =-(a*t2 - b*t4 + c * t5);
+
+   det = 1.0f / (a * dst[0][0] + b * dst[1][0] + c * dst[2][0]);
+   v3_muls( dst[0], det, dst[0] );
+   v3_muls( dst[1], det, dst[1] );
+   v3_muls( dst[2], det, dst[2] );
+   v3_muls( dst[3], det, dst[3] );
+}
+
+static inline void m4x3_copy( m4x3f a, m4x3f b )
 {
    v3_copy( a[0], b[0] );
    v3_copy( a[1], b[1] );
+   v3_copy( a[2], b[2] );
+   v3_copy( a[3], b[3] );
 }
 
-static inline int box_overlap( boxf a, boxf b )
+static inline void m4x3_identity( m4x3f a )
 {
-   return
-   ( a[0][0] <= b[1][0] && a[1][0] >= b[0][0] ) &&
-   ( a[0][1] <= b[1][1] && a[1][1] >= b[0][1] ) &&
-   ( a[0][2] <= b[1][2] && a[1][2] >= b[0][2] )
-   ;
+   m4x3f id = M4X3_IDENTITY;
+   m4x3_copy( id, a );
 }
 
-static int box_within( boxf greater, boxf lesser )
+static void m4x3_mul( m4x3f a, m4x3f b, m4x3f d ) 
 {
-   v3f a, b;
-   v3_sub( lesser[0], greater[0], a );
-   v3_sub( lesser[1], greater[1], b );
+   f32 
+   a00 = a[0][0], a01 = a[0][1], a02 = a[0][2],
+   a10 = a[1][0], a11 = a[1][1], a12 = a[1][2],
+   a20 = a[2][0], a21 = a[2][1], a22 = a[2][2],
+   a30 = a[3][0], a31 = a[3][1], a32 = a[3][2],
+   b00 = b[0][0], b01 = b[0][1], b02 = b[0][2],
+   b10 = b[1][0], b11 = b[1][1], b12 = b[1][2],
+   b20 = b[2][0], b21 = b[2][1], b22 = b[2][2],
+   b30 = b[3][0], b31 = b[3][1], b32 = b[3][2];
+   
+   d[0][0] = a00*b00 + a10*b01 + a20*b02;
+   d[0][1] = a01*b00 + a11*b01 + a21*b02;
+   d[0][2] = a02*b00 + a12*b01 + a22*b02;
+   d[1][0] = a00*b10 + a10*b11 + a20*b12;
+   d[1][1] = a01*b10 + a11*b11 + a21*b12;
+   d[1][2] = a02*b10 + a12*b11 + a22*b12;
+   d[2][0] = a00*b20 + a10*b21 + a20*b22;
+   d[2][1] = a01*b20 + a11*b21 + a21*b22;
+   d[2][2] = a02*b20 + a12*b21 + a22*b22;
+   d[3][0] = a00*b30 + a10*b31 + a20*b32 + a30;
+   d[3][1] = a01*b30 + a11*b31 + a21*b32 + a31;
+   d[3][2] = a02*b30 + a12*b31 + a22*b32 + a32;
+}
 
-   if( (a[0] >= 0.0f) && (a[1] >= 0.0f) && (a[2] >= 0.0f) &&
-       (b[0] <= 0.0f) && (b[1] <= 0.0f) && (b[2] <= 0.0f) )
-   {
-      return 1;
-   }
+#if 0 /* shat appf mingw wstringop-overflow */
+inline
+#endif
+static void m4x3_mulv( m4x3f m, v3f v, v3f d ) 
+{
+   v3f res;
+  
+   res[0] = m[0][0]*v[0] + m[1][0]*v[1] + m[2][0]*v[2] + m[3][0];
+   res[1] = m[0][1]*v[0] + m[1][1]*v[1] + m[2][1]*v[2] + m[3][1];
+   res[2] = m[0][2]*v[0] + m[1][2]*v[1] + m[2][2]*v[2] + m[3][2];
 
-   return 0;
+   v3_copy( res, d );
 }
 
-static inline void box_init_inf( boxf box )
+/* 
+ * Transform plane ( xyz, distance )
+ */
+static void m4x3_mulp( m4x3f m, v4f p, v4f d )
 {
-   v3_fill( box[0],  INFINITY );
-   v3_fill( box[1], -INFINITY );
+   v3f o;
+
+   v3_muls( p, p[3], o );
+   m4x3_mulv( m, o, o );
+   m3x3_mulv( m, p, d );
+   
+   d[3] = v3_dot( o, d );
 }
 
-int ray_aabb1( boxf box, v3f co, v3f dir_inv, float dist )
+/*
+ * Affine transforms
+ */
+
+static void m4x3_translate( m4x3f m, v3f v )
 {
-   v3f v0, v1;
-   float tmin, tmax;
+   v3_muladds( m[3], m[0], v[0], m[3] );
+   v3_muladds( m[3], m[1], v[1], m[3] );
+   v3_muladds( m[3], m[2], v[2], m[3] );
+}
 
-   v3_sub( box[0], co, v0 );
-   v3_sub( box[1], co, v1 );
+static void m4x3_rotate_x( m4x3f m, f32 angle )
+{
+   m4x3f t = M4X3_IDENTITY;
+   f32 c, s;
 
-   v3_mul( v0, dir_inv, v0 );
-   v3_mul( v1, dir_inv, v1 );
+   c = cosf( angle );
+   s = sinf( angle );
+
+   t[1][1] =  c;
+   t[1][2] =  s;
+   t[2][1] = -s;
+   t[2][2] =  c;
+
+   m4x3_mul( m, t, m );
+}
+
+static void m4x3_rotate_y( m4x3f m, f32 angle )
+{
+   m4x3f t = M4X3_IDENTITY;
+   f32 c, s;
+
+   c = cosf( angle );
+   s = sinf( angle );
+
+   t[0][0] =  c;
+   t[0][2] = -s;
+   t[2][0] =  s;
+   t[2][2] =  c;
+
+   m4x3_mul( m, t, m );
+}
+
+static void m4x3_rotate_z( m4x3f m, f32 angle )
+{
+   m4x3f t = M4X3_IDENTITY;
+   f32 c, s;
+
+   c = cosf( angle );
+   s = sinf( angle );
+
+   t[0][0] =  c;
+   t[0][1] =  s;
+   t[1][0] = -s;
+   t[1][1] =  c;
+
+   m4x3_mul( m, t, m );
+}
+
+static void m4x3_expand( m4x3f m, m4x4f d )
+{
+   v3_copy( m[0], d[0] );
+   v3_copy( m[1], d[1] );
+   v3_copy( m[2], d[2] );
+   v3_copy( m[3], d[3] );
+   d[0][3] = 0.0f;
+   d[1][3] = 0.0f;
+   d[2][3] = 0.0f;
+   d[3][3] = 1.0f;
+}
+
+static void m4x3_decompose( m4x3f m, v3f co, v4f q, v3f s )
+{
+   v3_copy( m[3], co );
+   s[0] = v3_length(m[0]);
+   s[1] = v3_length(m[1]);
+   s[2] = v3_length(m[2]);
+
+   m3x3f rot;
+   v3_divs( m[0], s[0], rot[0] );
+   v3_divs( m[1], s[1], rot[1] );
+   v3_divs( m[2], s[2], rot[2] );
+
+   m3x3_q( rot, q );
+}
+
+static void m4x3_expand_aabb_point( m4x3f m, boxf box, v3f point )
+{
+   v3f v;
+   m4x3_mulv( m, point, v );
+
+   v3_minv( box[0], v, box[0] );
+   v3_maxv( box[1], v, box[1] );
+}
+
+static void m4x3_transform_aabb( m4x3f m, boxf box )
+{
+   v3f a; v3f b;
    
-   tmin = vg_minf( v0[0], v1[0] );
-   tmax = vg_maxf( v0[0], v1[0] );
-   tmin = vg_maxf( tmin, vg_minf( v0[1], v1[1] ));
-   tmax = vg_minf( tmax, vg_maxf( v0[1], v1[1] ));
-   tmin = vg_maxf( tmin, vg_minf( v0[2], v1[2] ));
-   tmax = vg_minf( tmax, vg_maxf( v0[2], v1[2] ));
+   v3_copy( box[0], a );
+   v3_copy( box[1], b );
+   v3_fill( box[0],  INFINITY );
+   v3_fill( box[1], -INFINITY );
 
-   return (tmax >= tmin) && (tmin <= dist) && (tmax >= 0.0f);
+   m4x3_expand_aabb_point( m, box, (v3f){ a[0], a[1], a[2] } );
+   m4x3_expand_aabb_point( m, box, (v3f){ a[0], b[1], a[2] } );
+   m4x3_expand_aabb_point( m, box, (v3f){ b[0], b[1], a[2] } );
+   m4x3_expand_aabb_point( m, box, (v3f){ b[0], a[1], a[2] } );
+
+   m4x3_expand_aabb_point( m, box, (v3f){ a[0], a[1], b[2] } );
+   m4x3_expand_aabb_point( m, box, (v3f){ a[0], b[1], b[2] } );
+   m4x3_expand_aabb_point( m, box, (v3f){ b[0], b[1], b[2] } );
+   m4x3_expand_aabb_point( m, box, (v3f){ b[0], a[1], b[2] } );
 }
 
 static inline void m4x3_lookat( m4x3f m, v3f pos, v3f target, v3f up )
@@ -837,22 +1200,24 @@ static inline void m4x3_lookat( m4x3f m, v3f pos, v3f target, v3f up )
 }
 
 /*
- * Matrix 4x4
+ * -----------------------------------------------------------------------------
+ * Section 4.d                  4x4 matrices
+ * -----------------------------------------------------------------------------
  */
 
 #define M4X4_IDENTITY   {{1.0f, 0.0f, 0.0f, 0.0f },\
                         { 0.0f, 1.0f, 0.0f, 0.0f },\
                         { 0.0f, 0.0f, 1.0f, 0.0f },\
                         { 0.0f, 0.0f, 0.0f, 1.0f }}
-#define M4X4_ZERO         {{0.0f, 0.0f, 0.0f, 0.0f },\
+#define M4X4_ZERO       {{0.0f, 0.0f, 0.0f, 0.0f },\
                         { 0.0f, 0.0f, 0.0f, 0.0f },\
                         { 0.0f, 0.0f, 0.0f, 0.0f },\
                         { 0.0f, 0.0f, 0.0f, 0.0f }}
 
-static void m4x4_projection( m4x4f m, float angle,
-                             float ratio, float fnear, float ffar )
+static void m4x4_projection( m4x4f m, f32 angle,
+                             f32 ratio, f32 fnear, f32 ffar )
 {
-   float scale = tanf( angle * 0.5f * VG_PIf / 180.0f ) * fnear,
+   f32 scale = tanf( angle * 0.5f * VG_PIf / 180.0f ) * fnear,
          r = ratio * scale,
          l = -r,
          t = scale,
@@ -908,7 +1273,7 @@ static inline void m4x4_zero( m4x4f a )
 
 static inline void m4x4_mul( m4x4f a, m4x4f b, m4x4f d )
 {
-   float a00 = a[0][0], a01 = a[0][1], a02 = a[0][2], a03 = a[0][3],
+   f32 a00 = a[0][0], a01 = a[0][1], a02 = a[0][2], a03 = a[0][3],
          a10 = a[1][0], a11 = a[1][1], a12 = a[1][2], a13 = a[1][3],
          a20 = a[2][0], a21 = a[2][1], a22 = a[2][2], a23 = a[2][3],
          a30 = a[3][0], a31 = a[3][1], a32 = a[3][2], a33 = a[3][3],
@@ -950,7 +1315,7 @@ static inline void m4x4_mulv( m4x4f m, v4f v, v4f d )
 
 static inline void m4x4_inv( m4x4f a, m4x4f d )
 {
-   float a00 = a[0][0], a01 = a[0][1], a02 = a[0][2], a03 = a[0][3],
+   f32 a00 = a[0][0], a01 = a[0][1], a02 = a[0][2], a03 = a[0][3],
          a10 = a[1][0], a11 = a[1][1], a12 = a[1][2], a13 = a[1][3],
          a20 = a[2][0], a21 = a[2][1], a22 = a[2][2], a23 = a[2][3],
          a30 = a[3][0], a31 = a[3][1], a32 = a[3][2], a33 = a[3][3],
@@ -1006,516 +1371,166 @@ static inline void m4x4_inv( m4x4f a, m4x4f d )
 }
 
 /*
- * Planes (double precision)
+ * -----------------------------------------------------------------------------
+ * Section 5.a                       Boxes
+ * -----------------------------------------------------------------------------
  */
-static inline void tri_to_plane( double a[3], double b[3], 
-      double c[3], double p[4] )
+
+static inline void box_addpt( boxf a, v3f pt )
 {
-   double edge0[3];
-   double edge1[3];
-   double l;
-   
-   edge0[0] = b[0] - a[0];
-   edge0[1] = b[1] - a[1];
-   edge0[2] = b[2] - a[2];
-   
-   edge1[0] = c[0] - a[0];
-   edge1[1] = c[1] - a[1];
-   edge1[2] = c[2] - a[2];
-   
-   p[0] = edge0[1] * edge1[2] - edge0[2] * edge1[1];
-   p[1] = edge0[2] * edge1[0] - edge0[0] * edge1[2];
-   p[2] = edge0[0] * edge1[1] - edge0[1] * edge1[0];
-   
-   l = sqrt(p[0] * p[0] + p[1] * p[1] + p[2] * p[2]);
-   p[3] = (p[0] * a[0] + p[1] * a[1] + p[2] * a[2]) / l;
-   
-   p[0] = p[0] / l;
-   p[1] = p[1] / l;
-   p[2] = p[2] / l;
-}
-
-static int plane_intersect3( v4f a, v4f b, v4f c, v3f p )
-{
-   float const epsilon = 1e-6f;
-   
-   v3f x;
-   v3_cross( a, b, x );
-   float d = v3_dot( x, c );
-   
-   if( (d < epsilon) && (d > -epsilon) ) return 0;
-
-   v3f v0, v1, v2;
-   v3_cross( b, c, v0 );
-   v3_cross( c, a, v1 );
-   v3_cross( a, b, v2 );
-
-   v3_muls(       v0, a[3], p );
-   v3_muladds( p, v1, b[3], p );
-   v3_muladds( p, v2, c[3], p );
-   v3_divs( p, d, p );
-   
-   return 1;
+   v3_minv( a[0], pt, a[0] );
+   v3_maxv( a[1], pt, a[1] );
 }
 
-int plane_intersect2( v4f a, v4f b, v3f p, v3f n )
+static inline void box_concat( boxf a, boxf b )
 {
-   float const epsilon = 1e-6f;
-
-   v4f c;
-   v3_cross( a, b, c );
-   float d = v3_length2( c );
-
-   if( (d < epsilon) && (d > -epsilon) ) 
-      return 0;
-
-   v3f v0, v1, vx;
-   v3_cross( c, b, v0 );
-   v3_cross( a, c, v1 );
-
-   v3_muls( v0, a[3], vx );
-   v3_muladds( vx, v1, b[3], vx );
-   v3_divs( vx, d, p );
-   v3_copy( c, n );
-
-   return 1;
+   v3_minv( a[0], b[0], a[0] );
+   v3_maxv( a[1], b[1], a[1] );
 }
 
-static int plane_segment( v4f plane, v3f a, v3f b, v3f co )
+static inline void box_copy( boxf a, boxf b )
 {
-   float d0 = v3_dot( a, plane ) - plane[3],
-         d1 = v3_dot( b, plane ) - plane[3];
-
-   if( d0*d1 < 0.0f )
-   {
-      float tot = 1.0f/( fabsf(d0)+fabsf(d1) );
-
-      v3_muls( a, fabsf(d1) * tot, co );
-      v3_muladds( co, b, fabsf(d0) * tot, co );
-      return 1;
-   }
-
-   return 0;
+   v3_copy( a[0], b[0] );
+   v3_copy( a[1], b[1] );
 }
 
-static inline double plane_polarity( double p[4], double a[3] )
+static inline int box_overlap( boxf a, boxf b )
 {
-   return 
-   (a[0] * p[0] + a[1] * p[1] + a[2] * p[2])
-   -(p[0]*p[3] * p[0] + p[1]*p[3] * p[1] + p[2]*p[3] * p[2])
+   return
+   ( a[0][0] <= b[1][0] && a[1][0] >= b[0][0] ) &&
+   ( a[0][1] <= b[1][1] && a[1][1] >= b[0][1] ) &&
+   ( a[0][2] <= b[1][2] && a[1][2] >= b[0][2] )
    ;
 }
 
-/* Quaternions */
-
-static inline void q_identity( v4f q )
-{
-   q[0] = 0.0f; q[1] = 0.0f; q[2] = 0.0f; q[3] = 1.0f;
-}
-
-static inline void q_axis_angle( v4f q, v3f axis, float angle )
-{
-   float a = angle*0.5f,
-         c = cosf(a),
-         s = sinf(a);
-
-   q[0] = s*axis[0];
-   q[1] = s*axis[1];
-   q[2] = s*axis[2];
-   q[3] = c;
-}
-
-static inline void q_mul( v4f q, v4f q1, v4f d )
-{
-   v4f t;
-   t[0] = q[3]*q1[0] + q[0]*q1[3] + q[1]*q1[2] - q[2]*q1[1];
-   t[1] = q[3]*q1[1] - q[0]*q1[2] + q[1]*q1[3] + q[2]*q1[0];
-   t[2] = q[3]*q1[2] + q[0]*q1[1] - q[1]*q1[0] + q[2]*q1[3];
-   t[3] = q[3]*q1[3] - q[0]*q1[0] - q[1]*q1[1] - q[2]*q1[2];
-   v4_copy( t, d );
-}
-
-static inline void q_normalize( v4f q )
-{
-   float s = 1.0f/ sqrtf(v4_dot(q,q));
-   q[0] *= s;
-   q[1] *= s;
-   q[2] *= s;
-   q[3] *= s;
-}
-
-static inline void q_inv( v4f q, v4f d )
-{
-   float s = 1.0f / v4_dot(q,q);
-   d[0] = -q[0]*s;
-   d[1] = -q[1]*s;
-   d[2] = -q[2]*s;
-   d[3] =  q[3]*s;
-}
-
-static inline void q_nlerp( v4f a, v4f b, float t, v4f d )
-{
-   if( v4_dot(a,b) < 0.0f ){
-      v4_muls( b, -1.0f, d );
-      v4_lerp( a, d, t, d );
-   }
-   else
-      v4_lerp( a, b, t, d );
-
-   q_normalize( d );
-}
-
-static void euler_m3x3( v3f angles, m3x3f d )
-{
-   float cosY = cosf( angles[0] ),
-         sinY = sinf( angles[0] ),
-         cosP = cosf( angles[1] ),
-         sinP = sinf( angles[1] ),
-         cosR = cosf( angles[2] ),
-         sinR = sinf( angles[2] );
-
-   d[2][0] = -sinY * cosP;
-   d[2][1] =  sinP;
-   d[2][2] =  cosY * cosP;
-
-   d[0][0] =  cosY * cosR;
-   d[0][1] =  sinR;
-   d[0][2] =  sinY * cosR;
-
-   v3_cross( d[0], d[2], d[1] );
-}
-
-static inline void q_m3x3( v4f q, m3x3f d )
-{
-   float
-      l = v4_length(q),
-      s = l > 0.0f? 2.0f/l: 0.0f,
-
-      xx = s*q[0]*q[0], xy = s*q[0]*q[1], wx = s*q[3]*q[0],
-      yy = s*q[1]*q[1], yz = s*q[1]*q[2], wy = s*q[3]*q[1],
-      zz = s*q[2]*q[2], xz = s*q[0]*q[2], wz = s*q[3]*q[2];
-
-   d[0][0] = 1.0f - yy - zz;
-   d[1][1] = 1.0f - xx - zz;
-   d[2][2] = 1.0f - xx - yy;
-   d[0][1] = xy + wz;
-   d[1][2] = yz + wx;
-   d[2][0] = xz + wy;
-   d[1][0] = xy - wz;
-   d[2][1] = yz - wx;
-   d[0][2] = xz - wy;
-}
-
-static void m3x3_q( m3x3f m, v4f q )
+static int box_within( boxf greater, boxf lesser )
 {
-   float diag, r, rinv;
+   v3f a, b;
+   v3_sub( lesser[0], greater[0], a );
+   v3_sub( lesser[1], greater[1], b );
 
-   diag = m[0][0] + m[1][1] + m[2][2];
-   if( diag >= 0.0f )
-   {
-      r    = sqrtf( 1.0f + diag );
-      rinv = 0.5f / r;
-      q[0] = rinv * (m[1][2] - m[2][1]);
-      q[1] = rinv * (m[2][0] - m[0][2]);
-      q[2] = rinv * (m[0][1] - m[1][0]);
-      q[3] = r    * 0.5f;
-   } 
-   else if( m[0][0] >= m[1][1] && m[0][0] >= m[2][2] )
-   {
-      r    = sqrtf( 1.0f - m[1][1] - m[2][2] + m[0][0] );
-      rinv = 0.5f / r;
-      q[0] = r    * 0.5f;
-      q[1] = rinv * (m[0][1] + m[1][0]);
-      q[2] = rinv * (m[0][2] + m[2][0]);
-      q[3] = rinv * (m[1][2] - m[2][1]);
-   } 
-   else if( m[1][1] >= m[2][2] )
-   {
-      r    = sqrtf( 1.0f - m[0][0] - m[2][2] + m[1][1] );
-      rinv = 0.5f / r;
-      q[0] = rinv * (m[0][1] + m[1][0]);
-      q[1] = r    * 0.5f;
-      q[2] = rinv * (m[1][2] + m[2][1]);
-      q[3] = rinv * (m[2][0] - m[0][2]);
-   } 
-   else 
+   if( (a[0] >= 0.0f) && (a[1] >= 0.0f) && (a[2] >= 0.0f) &&
+       (b[0] <= 0.0f) && (b[1] <= 0.0f) && (b[2] <= 0.0f) )
    {
-      r    = sqrtf( 1.0f - m[0][0] - m[1][1] + m[2][2] );
-      rinv = 0.5f / r;
-      q[0] = rinv * (m[0][2] + m[2][0]);
-      q[1] = rinv * (m[1][2] + m[2][1]);
-      q[2] = r    * 0.5f;
-      q[3] = rinv * (m[0][1] - m[1][0]);
+      return 1;
    }
-}
-
-static void q_mulv( v4f q, v3f v, v3f d )
-{
-   v3f v1, v2;
-
-   v3_muls( q, 2.0f*v3_dot(q,v), v1 );
-   v3_muls( v, q[3]*q[3] - v3_dot(q,q), v2 );
-   v3_add( v1, v2, v1 );
-   v3_cross( q, v, v2 );
-   v3_muls( v2, 2.0f*q[3], v2 );
-   v3_add( v1, v2, d );
-}
-
-enum contact_type
-{
-   k_contact_type_default,
-   k_contact_type_disabled,
-   k_contact_type_edge
-};
-
-/*
- * Matrix 4x3
- */
-
-#define M4X3_IDENTITY   {{1.0f, 0.0f, 0.0f, },\
-                        { 0.0f, 1.0f, 0.0f, },\
-                        { 0.0f, 0.0f, 1.0f, },\
-                        { 0.0f, 0.0f, 0.0f }}
-
-static inline void m4x3_to_3x3( m4x3f a, m3x3f b )
-{
-   v3_copy( a[0], b[0] );
-   v3_copy( a[1], b[1] );
-   v3_copy( a[2], b[2] );
-}
-
-static inline void m4x3_invert_affine( m4x3f a, m4x3f b )
-{
-   m3x3_transpose( a, b );
-   m3x3_mulv( b, a[3], b[3] );
-   v3_negate( b[3], b[3] );
-}
-
-static void m4x3_invert_full( m4x3f src, m4x3f dst )
-{
-  float t2, t4, t5,
-        det,
-        a = src[0][0], b = src[0][1], c = src[0][2],
-        e = src[1][0], f = src[1][1], g = src[1][2],
-        i = src[2][0], j = src[2][1], k = src[2][2],
-        m = src[3][0], n = src[3][1], o = src[3][2];
-
-   t2 = j*o - n*k;
-   t4 = i*o - m*k;
-   t5 = i*n - m*j;
-   
-   dst[0][0] =  f*k - g*j;
-   dst[1][0] =-(e*k - g*i);
-   dst[2][0] =  e*j - f*i;
-   dst[3][0] =-(e*t2 - f*t4 + g*t5);
-   
-   dst[0][1] =-(b*k - c*j);
-   dst[1][1] =  a*k - c*i;
-   dst[2][1] =-(a*j - b*i);
-   dst[3][1] =  a*t2 - b*t4 + c*t5;
-   
-   t2 = f*o - n*g;
-   t4 = e*o - m*g; 
-   t5 = e*n - m*f;
-   
-   dst[0][2] =  b*g - c*f ;
-   dst[1][2] =-(a*g - c*e );
-   dst[2][2] =  a*f - b*e ;
-   dst[3][2] =-(a*t2 - b*t4 + c * t5);
-
-   det = 1.0f / (a * dst[0][0] + b * dst[1][0] + c * dst[2][0]);
-   v3_muls( dst[0], det, dst[0] );
-   v3_muls( dst[1], det, dst[1] );
-   v3_muls( dst[2], det, dst[2] );
-   v3_muls( dst[3], det, dst[3] );
-}
-
-static inline void m4x3_copy( m4x3f a, m4x3f b )
-{
-   v3_copy( a[0], b[0] );
-   v3_copy( a[1], b[1] );
-   v3_copy( a[2], b[2] );
-   v3_copy( a[3], b[3] );
-}
 
-static inline void m4x3_identity( m4x3f a )
-{
-   m4x3f id = M4X3_IDENTITY;
-   m4x3_copy( id, a );
-}
-
-static void m4x3_mul( m4x3f a, m4x3f b, m4x3f d ) 
-{
-   float 
-   a00 = a[0][0], a01 = a[0][1], a02 = a[0][2],
-   a10 = a[1][0], a11 = a[1][1], a12 = a[1][2],
-   a20 = a[2][0], a21 = a[2][1], a22 = a[2][2],
-   a30 = a[3][0], a31 = a[3][1], a32 = a[3][2],
-   b00 = b[0][0], b01 = b[0][1], b02 = b[0][2],
-   b10 = b[1][0], b11 = b[1][1], b12 = b[1][2],
-   b20 = b[2][0], b21 = b[2][1], b22 = b[2][2],
-   b30 = b[3][0], b31 = b[3][1], b32 = b[3][2];
-   
-   d[0][0] = a00*b00 + a10*b01 + a20*b02;
-   d[0][1] = a01*b00 + a11*b01 + a21*b02;
-   d[0][2] = a02*b00 + a12*b01 + a22*b02;
-   d[1][0] = a00*b10 + a10*b11 + a20*b12;
-   d[1][1] = a01*b10 + a11*b11 + a21*b12;
-   d[1][2] = a02*b10 + a12*b11 + a22*b12;
-   d[2][0] = a00*b20 + a10*b21 + a20*b22;
-   d[2][1] = a01*b20 + a11*b21 + a21*b22;
-   d[2][2] = a02*b20 + a12*b21 + a22*b22;
-   d[3][0] = a00*b30 + a10*b31 + a20*b32 + a30;
-   d[3][1] = a01*b30 + a11*b31 + a21*b32 + a31;
-   d[3][2] = a02*b30 + a12*b31 + a22*b32 + a32;
-}
-
-#if 0 /* shat appf mingw wstringop-overflow */
-inline
-#endif
-static void m4x3_mulv( m4x3f m, v3f v, v3f d ) 
-{
-   v3f res;
-  
-   res[0] = m[0][0]*v[0] + m[1][0]*v[1] + m[2][0]*v[2] + m[3][0];
-   res[1] = m[0][1]*v[0] + m[1][1]*v[1] + m[2][1]*v[2] + m[3][1];
-   res[2] = m[0][2]*v[0] + m[1][2]*v[1] + m[2][2]*v[2] + m[3][2];
-
-   v3_copy( res, d );
+   return 0;
 }
 
-/* 
- * Transform plane ( xyz, distance )
- */
-static void m4x3_mulp( m4x3f m, v4f p, v4f d )
+static inline void box_init_inf( boxf box )
 {
-   v3f o;
-
-   v3_muls( p, p[3], o );
-   m4x3_mulv( m, o, o );
-   m3x3_mulv( m, p, d );
-   
-   d[3] = v3_dot( o, d );
+   v3_fill( box[0],  INFINITY );
+   v3_fill( box[1], -INFINITY );
 }
 
 /*
- * Affine transforms
+ * -----------------------------------------------------------------------------
+ * Section 5.b                       Planes
+ * -----------------------------------------------------------------------------
  */
 
-static void m4x3_translate( m4x3f m, v3f v )
+static inline void tri_to_plane( f64 a[3], f64 b[3], 
+      f64 c[3], f64 p[4] )
 {
-   v3_muladds( m[3], m[0], v[0], m[3] );
-   v3_muladds( m[3], m[1], v[1], m[3] );
-   v3_muladds( m[3], m[2], v[2], m[3] );
+   f64 edge0[3];
+   f64 edge1[3];
+   f64 l;
+   
+   edge0[0] = b[0] - a[0];
+   edge0[1] = b[1] - a[1];
+   edge0[2] = b[2] - a[2];
+   
+   edge1[0] = c[0] - a[0];
+   edge1[1] = c[1] - a[1];
+   edge1[2] = c[2] - a[2];
+   
+   p[0] = edge0[1] * edge1[2] - edge0[2] * edge1[1];
+   p[1] = edge0[2] * edge1[0] - edge0[0] * edge1[2];
+   p[2] = edge0[0] * edge1[1] - edge0[1] * edge1[0];
+   
+   l = sqrt(p[0] * p[0] + p[1] * p[1] + p[2] * p[2]);
+   p[3] = (p[0] * a[0] + p[1] * a[1] + p[2] * a[2]) / l;
+   
+   p[0] = p[0] / l;
+   p[1] = p[1] / l;
+   p[2] = p[2] / l;
 }
 
-static void m4x3_rotate_x( m4x3f m, float angle )
+static int plane_intersect3( v4f a, v4f b, v4f c, v3f p )
 {
-   m4x3f t = M4X3_IDENTITY;
-   float c, s;
-
-   c = cosf( angle );
-   s = sinf( angle );
+   f32 const epsilon = 1e-6f;
+   
+   v3f x;
+   v3_cross( a, b, x );
+   f32 d = v3_dot( x, c );
+   
+   if( (d < epsilon) && (d > -epsilon) ) return 0;
 
-   t[1][1] =  c;
-   t[1][2] =  s;
-   t[2][1] = -s;
-   t[2][2] =  c;
+   v3f v0, v1, v2;
+   v3_cross( b, c, v0 );
+   v3_cross( c, a, v1 );
+   v3_cross( a, b, v2 );
 
-   m4x3_mul( m, t, m );
+   v3_muls(       v0, a[3], p );
+   v3_muladds( p, v1, b[3], p );
+   v3_muladds( p, v2, c[3], p );
+   v3_divs( p, d, p );
+   
+   return 1;
 }
 
-static void m4x3_rotate_y( m4x3f m, float angle )
+int plane_intersect2( v4f a, v4f b, v3f p, v3f n )
 {
-   m4x3f t = M4X3_IDENTITY;
-   float c, s;
-
-   c = cosf( angle );
-   s = sinf( angle );
-
-   t[0][0] =  c;
-   t[0][2] = -s;
-   t[2][0] =  s;
-   t[2][2] =  c;
+   f32 const epsilon = 1e-6f;
 
-   m4x3_mul( m, t, m );
-}
-
-static void m4x3_rotate_z( m4x3f m, float angle )
-{
-   m4x3f t = M4X3_IDENTITY;
-   float c, s;
+   v4f c;
+   v3_cross( a, b, c );
+   f32 d = v3_length2( c );
 
-   c = cosf( angle );
-   s = sinf( angle );
+   if( (d < epsilon) && (d > -epsilon) ) 
+      return 0;
 
-   t[0][0] =  c;
-   t[0][1] =  s;
-   t[1][0] = -s;
-   t[1][1] =  c;
+   v3f v0, v1, vx;
+   v3_cross( c, b, v0 );
+   v3_cross( a, c, v1 );
 
-   m4x3_mul( m, t, m );
-}
+   v3_muls( v0, a[3], vx );
+   v3_muladds( vx, v1, b[3], vx );
+   v3_divs( vx, d, p );
+   v3_copy( c, n );
 
-static void m4x3_expand( m4x3f m, m4x4f d )
-{
-   v3_copy( m[0], d[0] );
-   v3_copy( m[1], d[1] );
-   v3_copy( m[2], d[2] );
-   v3_copy( m[3], d[3] );
-   d[0][3] = 0.0f;
-   d[1][3] = 0.0f;
-   d[2][3] = 0.0f;
-   d[3][3] = 1.0f;
+   return 1;
 }
 
-static void m4x3_decompose( m4x3f m, v3f co, v4f q, v3f s )
+static int plane_segment( v4f plane, v3f a, v3f b, v3f co )
 {
-   v3_copy( m[3], co );
-   s[0] = v3_length(m[0]);
-   s[1] = v3_length(m[1]);
-   s[2] = v3_length(m[2]);
-
-   m3x3f rot;
-   v3_divs( m[0], s[0], rot[0] );
-   v3_divs( m[1], s[1], rot[1] );
-   v3_divs( m[2], s[2], rot[2] );
+   f32 d0 = v3_dot( a, plane ) - plane[3],
+         d1 = v3_dot( b, plane ) - plane[3];
 
-   m3x3_q( rot, q );
-}
+   if( d0*d1 < 0.0f )
+   {
+      f32 tot = 1.0f/( fabsf(d0)+fabsf(d1) );
 
-static void m4x3_expand_aabb_point( m4x3f m, boxf box, v3f point )
-{
-   v3f v;
-   m4x3_mulv( m, point, v );
+      v3_muls( a, fabsf(d1) * tot, co );
+      v3_muladds( co, b, fabsf(d0) * tot, co );
+      return 1;
+   }
 
-   v3_minv( box[0], v, box[0] );
-   v3_maxv( box[1], v, box[1] );
+   return 0;
 }
 
-static void m4x3_transform_aabb( m4x3f m, boxf box )
+static inline f64 plane_polarity( f64 p[4], f64 a[3] )
 {
-   v3f a; v3f b;
-   
-   v3_copy( box[0], a );
-   v3_copy( box[1], b );
-   v3_fill( box[0],  INFINITY );
-   v3_fill( box[1], -INFINITY );
-
-   m4x3_expand_aabb_point( m, box, (v3f){ a[0], a[1], a[2] } );
-   m4x3_expand_aabb_point( m, box, (v3f){ a[0], b[1], a[2] } );
-   m4x3_expand_aabb_point( m, box, (v3f){ b[0], b[1], a[2] } );
-   m4x3_expand_aabb_point( m, box, (v3f){ b[0], a[1], a[2] } );
-
-   m4x3_expand_aabb_point( m, box, (v3f){ a[0], a[1], b[2] } );
-   m4x3_expand_aabb_point( m, box, (v3f){ a[0], b[1], b[2] } );
-   m4x3_expand_aabb_point( m, box, (v3f){ b[0], b[1], b[2] } );
-   m4x3_expand_aabb_point( m, box, (v3f){ b[0], a[1], b[2] } );
+   return 
+   (a[0] * p[0] + a[1] * p[1] + a[2] * p[2])
+   -(p[0]*p[3] * p[0] + p[1]*p[3] * p[1] + p[2]*p[3] * p[2])
+   ;
 }
 
 /*
  * -----------------------------------------------------------------------------
- *                        Closest point functions
+ * Section 5.c            Closest point functions
  * -----------------------------------------------------------------------------
  */
 
@@ -1523,19 +1538,19 @@ static void m4x3_transform_aabb( m4x3f m, boxf box )
  * These closest point tests were learned from Real-Time Collision Detection by 
  * Christer Ericson 
  */
-VG_STATIC float closest_segment_segment( v3f p1, v3f q1, v3f p2, v3f q2, 
-   float *s, float *t, v3f c1, v3f c2)
+VG_STATIC f32 closest_segment_segment( v3f p1, v3f q1, v3f p2, v3f q2, 
+   f32 *s, f32 *t, v3f c1, v3f c2)
 {
    v3f d1,d2,r;
    v3_sub( q1, p1, d1 );
    v3_sub( q2, p2, d2 );
    v3_sub( p1, p2, r );
 
-   float a = v3_length2( d1 ),
+   f32 a = v3_length2( d1 ),
          e = v3_length2( d2 ),
          f = v3_dot( d2, r );
 
-   const float kEpsilon = 0.0001f;
+   const f32 kEpsilon = 0.0001f;
 
    if( a <= kEpsilon && e <= kEpsilon )
    {
@@ -1557,7 +1572,7 @@ VG_STATIC float closest_segment_segment( v3f p1, v3f q1, v3f p2, v3f q2,
    }
    else
    {
-      float c = v3_dot( d1, r );
+      f32 c = v3_dot( d1, r );
       if( e <= kEpsilon )
       {
          *t = 0.0f;
@@ -1565,7 +1580,7 @@ VG_STATIC float closest_segment_segment( v3f p1, v3f q1, v3f p2, v3f q2,
       }
       else
       {
-         float b = v3_dot(d1,d2),
+         f32 b = v3_dot(d1,d2),
                d = a*e-b*b;
 
          if( d != 0.0f )
@@ -1624,13 +1639,13 @@ VG_STATIC void closest_point_obb( v3f p, boxf box,
    m4x3_mulv( mtx, local, dest );
 }
 
-VG_STATIC float closest_point_segment( v3f a, v3f b, v3f point, v3f dest )
+VG_STATIC f32 closest_point_segment( v3f a, v3f b, v3f point, v3f dest )
 {
    v3f v0, v1;
    v3_sub( b, a, v0 );
    v3_sub( point, a, v1 );
 
-   float t = v3_dot( v1, v0 ) / v3_length2(v0);
+   f32 t = v3_dot( v1, v0 ) / v3_length2(v0);
    t = vg_clampf(t,0.0f,1.0f);
    v3_muladds( a, v0, t, dest );
    return t;
@@ -1639,7 +1654,7 @@ VG_STATIC float closest_point_segment( v3f a, v3f b, v3f point, v3f dest )
 VG_STATIC void closest_on_triangle( v3f p, v3f tri[3], v3f dest )
 {
    v3f ab, ac, ap;
-   float d1, d2;
+   f32 d1, d2;
 
    /* Region outside A */
    v3_sub( tri[1], tri[0], ab );
@@ -1657,7 +1672,7 @@ VG_STATIC void closest_on_triangle( v3f p, v3f tri[3], v3f dest )
 
    /* Region outside B */
    v3f bp;
-   float d3, d4;
+   f32 d3, d4;
 
    v3_sub( p, tri[1], bp );
    d3 = v3_dot( ab, bp );
@@ -1671,10 +1686,10 @@ VG_STATIC void closest_on_triangle( v3f p, v3f tri[3], v3f dest )
    }
    
    /* Edge region of AB */
-   float vc = d1*d4 - d3*d2;
+   f32 vc = d1*d4 - d3*d2;
    if( vc <= 0.0f && d1 >= 0.0f && d3 <= 0.0f )
    {
-      float v = d1 / (d1-d3);
+      f32 v = d1 / (d1-d3);
       v3_muladds( tri[0], ab, v, dest );
       v3_copy( (v3f){INFINITY,INFINITY,INFINITY}, dest );
       return;
@@ -1682,7 +1697,7 @@ VG_STATIC void closest_on_triangle( v3f p, v3f tri[3], v3f dest )
 
    /* Region outside C */
    v3f cp;
-   float d5, d6;
+   f32 d5, d6;
    v3_sub( p, tri[2], cp );
    d5 = v3_dot(ab, cp);
    d6 = v3_dot(ac, cp);
@@ -1695,20 +1710,20 @@ VG_STATIC void closest_on_triangle( v3f p, v3f tri[3], v3f dest )
    }
 
    /* Region of AC */
-   float vb = d5*d2 - d1*d6;
+   f32 vb = d5*d2 - d1*d6;
    if( vb <= 0.0f && d2 >= 0.0f && d6 <= 0.0f )
    {
-      float w = d2 / (d2-d6);
+      f32 w = d2 / (d2-d6);
       v3_muladds( tri[0], ac, w, dest );
       v3_copy( (v3f){INFINITY,INFINITY,INFINITY}, dest );
       return;
    }
 
    /* Region of BC */
-   float va = d3*d6 - d5*d4;
+   f32 va = d3*d6 - d5*d4;
    if( va <= 0.0f && (d4-d3) >= 0.0f && (d5-d6) >= 0.0f )
    {
-      float w = (d4-d3) / ((d4-d3) + (d5-d6));
+      f32 w = (d4-d3) / ((d4-d3) + (d5-d6));
       v3f bc;
       v3_sub( tri[2], tri[1], bc );
       v3_muladds( tri[1], bc, w, dest );
@@ -1717,7 +1732,7 @@ VG_STATIC void closest_on_triangle( v3f p, v3f tri[3], v3f dest )
    }
 
    /* P inside region, Q via barycentric coordinates uvw */
-   float d = 1.0f/(va+vb+vc),
+   f32 d = 1.0f/(va+vb+vc),
          v = vb*d,
          w = vc*d;
 
@@ -1725,10 +1740,17 @@ VG_STATIC void closest_on_triangle( v3f p, v3f tri[3], v3f dest )
    v3_muladds( dest, ac, w, dest );
 }
 
+enum contact_type
+{
+   k_contact_type_default,
+   k_contact_type_disabled,
+   k_contact_type_edge
+};
+
 VG_STATIC enum contact_type closest_on_triangle_1( v3f p, v3f tri[3], v3f dest )
 {
    v3f ab, ac, ap;
-   float d1, d2;
+   f32 d1, d2;
 
    /* Region outside A */
    v3_sub( tri[1], tri[0], ab );
@@ -1745,7 +1767,7 @@ VG_STATIC enum contact_type closest_on_triangle_1( v3f p, v3f tri[3], v3f dest )
 
    /* Region outside B */
    v3f bp;
-   float d3, d4;
+   f32 d3, d4;
 
    v3_sub( p, tri[1], bp );
    d3 = v3_dot( ab, bp );
@@ -1758,17 +1780,17 @@ VG_STATIC enum contact_type closest_on_triangle_1( v3f p, v3f tri[3], v3f dest )
    }
    
    /* Edge region of AB */
-   float vc = d1*d4 - d3*d2;
+   f32 vc = d1*d4 - d3*d2;
    if( vc <= 0.0f && d1 >= 0.0f && d3 <= 0.0f )
    {
-      float v = d1 / (d1-d3);
+      f32 v = d1 / (d1-d3);
       v3_muladds( tri[0], ab, v, dest );
       return k_contact_type_edge;
    }
 
    /* Region outside C */
    v3f cp;
-   float d5, d6;
+   f32 d5, d6;
    v3_sub( p, tri[2], cp );
    d5 = v3_dot(ab, cp);
    d6 = v3_dot(ac, cp);
@@ -1780,19 +1802,19 @@ VG_STATIC enum contact_type closest_on_triangle_1( v3f p, v3f tri[3], v3f dest )
    }
 
    /* Region of AC */
-   float vb = d5*d2 - d1*d6;
+   f32 vb = d5*d2 - d1*d6;
    if( vb <= 0.0f && d2 >= 0.0f && d6 <= 0.0f )
    {
-      float w = d2 / (d2-d6);
+      f32 w = d2 / (d2-d6);
       v3_muladds( tri[0], ac, w, dest );
       return k_contact_type_edge;
    }
 
    /* Region of BC */
-   float va = d3*d6 - d5*d4;
+   f32 va = d3*d6 - d5*d4;
    if( va <= 0.0f && (d4-d3) >= 0.0f && (d5-d6) >= 0.0f )
    {
-      float w = (d4-d3) / ((d4-d3) + (d5-d6));
+      f32 w = (d4-d3) / ((d4-d3) + (d5-d6));
       v3f bc;
       v3_sub( tri[2], tri[1], bc );
       v3_muladds( tri[1], bc, w, dest );
@@ -1800,7 +1822,7 @@ VG_STATIC enum contact_type closest_on_triangle_1( v3f p, v3f tri[3], v3f dest )
    }
 
    /* P inside region, Q via barycentric coordinates uvw */
-   float d = 1.0f/(va+vb+vc),
+   f32 d = 1.0f/(va+vb+vc),
          v = vb*d,
          w = vc*d;
 
@@ -1810,7 +1832,6 @@ VG_STATIC enum contact_type closest_on_triangle_1( v3f p, v3f tri[3], v3f dest )
    return k_contact_type_default;
 }
 
-
 static void closest_point_elipse( v2f p, v2f e, v2f o )
 {
    v2f pabs, ei, e2, ve, t;
@@ -1822,8 +1843,7 @@ static void closest_point_elipse( v2f p, v2f e, v2f o )
 
    v2_fill( t, 0.70710678118654752f );
 
-   for( int i=0; i<3; i++ )
-   {
+   for( int i=0; i<3; i++ ){
       v2f v, u, ud, w;
 
       v2_mul( ve, t, v );  /* ve*t*t*t */
@@ -1850,19 +1870,42 @@ static void closest_point_elipse( v2f p, v2f e, v2f o )
 }
 
 /*
- * Raycasts
+ * -----------------------------------------------------------------------------
+ * Section 5.d               Raycasts & Spherecasts
+ * -----------------------------------------------------------------------------
  */
 
+int ray_aabb1( boxf box, v3f co, v3f dir_inv, f32 dist )
+{
+   v3f v0, v1;
+   f32 tmin, tmax;
+
+   v3_sub( box[0], co, v0 );
+   v3_sub( box[1], co, v1 );
+
+   v3_mul( v0, dir_inv, v0 );
+   v3_mul( v1, dir_inv, v1 );
+   
+   tmin = vg_minf( v0[0], v1[0] );
+   tmax = vg_maxf( v0[0], v1[0] );
+   tmin = vg_maxf( tmin, vg_minf( v0[1], v1[1] ));
+   tmax = vg_minf( tmax, vg_maxf( v0[1], v1[1] ));
+   tmin = vg_maxf( tmin, vg_minf( v0[2], v1[2] ));
+   tmax = vg_minf( tmax, vg_maxf( v0[2], v1[2] ));
+
+   return (tmax >= tmin) && (tmin <= dist) && (tmax >= 0.0f);
+}
+
 /* Time of intersection with ray vs triangle */
 static int ray_tri( v3f tri[3], v3f co, 
-                    v3f dir, float *dist )
+                    v3f dir, f32 *dist )
 {
-   float const kEpsilon = 0.00001f;
+   f32 const kEpsilon = 0.00001f;
 
    v3f v0, v1, h, s, q, n;
-   float a,f,u,v,t;
+   f32 a,f,u,v,t;
 
-   float *pa = tri[0],
+   f32 *pa = tri[0],
          *pb = tri[1],
          *pc = tri[2];
 
@@ -1902,20 +1945,20 @@ static int ray_tri( v3f tri[3], v3f co,
 }
 
 /* time of intersection with ray vs sphere */
-static int ray_sphere( v3f c, float r, 
-                       v3f co, v3f dir, float *t )
+static int ray_sphere( v3f c, f32 r, 
+                       v3f co, v3f dir, f32 *t )
 {
    v3f m;
    v3_sub( co, c, m );
 
-   float b  = v3_dot( m, dir ),
+   f32 b  = v3_dot( m, dir ),
          c1 = v3_dot( m, m ) - r*r;
 
    /* Exit if râs origin outside s (c > 0) and r pointing away from s (b > 0) */
    if( c1 > 0.0f && b > 0.0f ) 
       return 0;
    
-   float discr = b*b - c1;
+   f32 discr = b*b - c1;
 
    /* A negative discriminant corresponds to ray missing sphere */
    if( discr < 0.0f )
@@ -1941,8 +1984,8 @@ static int ray_sphere( v3f c, float r,
  * Heavily adapted from regular segment vs cylinder from:
  *    Real-Time Collision Detection
  */
-static int ray_uncapped_finite_cylinder( v3f q, v3f p, float r, 
-                                         v3f co, v3f dir, float *t )
+static int ray_uncapped_finite_cylinder( v3f q, v3f p, f32 r, 
+                                         v3f co, v3f dir, f32 *t )
 {
    v3f d, m, n, sb;
    v3_muladds( co, dir, 1.0f, sb );
@@ -1951,7 +1994,7 @@ static int ray_uncapped_finite_cylinder( v3f q, v3f p, float r,
    v3_sub( co, p, m );
    v3_sub( sb, co, n );
    
-   float md = v3_dot( m, d ),
+   f32 md = v3_dot( m, d ),
          nd = v3_dot( n, d ),
          dd = v3_dot( d, d ),
          nn = v3_dot( n, n ),
@@ -1966,7 +2009,7 @@ static int ray_uncapped_finite_cylinder( v3f q, v3f p, float r,
       return 0;
    }
 
-   float b     = dd*mn - nd*md,
+   f32 b     = dd*mn - nd*md,
          discr = b*b - a*c;
 
    if( discr < 0.0f ) 
@@ -1992,7 +2035,7 @@ static int ray_uncapped_finite_cylinder( v3f q, v3f p, float r,
  * colliding area. This is a fairly long procedure.
  */
 static int spherecast_triangle( v3f tri[3],
-                                v3f co, v3f dir, float r, float *t, v3f n )
+                                v3f co, v3f dir, f32 r, f32 *t, v3f n )
 {
    v3f sum[3];
    v3f v0, v1;
@@ -2006,11 +2049,10 @@ static int spherecast_triangle( v3f tri[3],
    v3_muladds( tri[2], n, r, sum[2] );
 
    int hit = 0;
-   float t_min = INFINITY,
+   f32 t_min = INFINITY,
          t1;
 
-   if( ray_tri( sum, co, dir, &t1 ) )
-   {
+   if( ray_tri( sum, co, dir, &t1 ) ){
       t_min = vg_minf( t_min, t1 );
       hit = 1;
    }
@@ -2019,25 +2061,20 @@ static int spherecast_triangle( v3f tri[3],
     * Currently disabled; ray_sphere requires |d| = 1. it is not very important.
     */
 #if 0
-   for( int i=0; i<3; i++ )
-   {
-      if( ray_sphere( tri[i], r, co, dir, &t1 ) )
-      {
+   for( int i=0; i<3; i++ ){
+      if( ray_sphere( tri[i], r, co, dir, &t1 ) ){
          t_min = vg_minf( t_min, t1 );
          hit = 1;
       }
    }
 #endif
 
-   for( int i=0; i<3; i++ )
-   {
+   for( int i=0; i<3; i++ ){
       int i0 =  i,
           i1 = (i+1)%3;
 
-      if( ray_uncapped_finite_cylinder( tri[i0], tri[i1], r, co, dir, &t1 ) )
-      {
-         if( t1 < t_min )
-         {
+      if( ray_uncapped_finite_cylinder( tri[i0], tri[i1], r, co, dir, &t1 ) ){
+         if( t1 < t_min ){
             t_min = t1;
             
             v3f co1, ct, cx;
@@ -2057,53 +2094,148 @@ static int spherecast_triangle( v3f tri[3],
    return hit;
 }
 
-static inline float vg_randf(void)
+/*
+ * -----------------------------------------------------------------------------
+ * Section 5.e                       Curves
+ * -----------------------------------------------------------------------------
+ */
+
+static void eval_bezier_time( v3f p0, v3f p1, v3f h0, v3f h1, f32 t, v3f p )
 {
-   /* TODO: replace with our own rand */
-   return (float)rand()/(float)(RAND_MAX);
+   f32 tt = t*t,
+         ttt = tt*t;
+
+   v3_muls( p1, ttt, p );
+   v3_muladds( p, h1, 3.0f*tt  -3.0f*ttt, p );
+   v3_muladds( p, h0, 3.0f*ttt -6.0f*tt  +3.0f*t, p );
+   v3_muladds( p, p0, 3.0f*tt  -ttt -3.0f*t +1.0f, p );
 }
 
-static inline void vg_rand_dir(v3f dir)
+static void eval_bezier3( v3f p0, v3f p1, v3f p2, f32 t, v3f p )
 {
-   dir[0] = vg_randf();
-   dir[1] = vg_randf();
-   dir[2] = vg_randf();
+   f32 u = 1.0f-t;
 
-   v3_muls( dir, 2.0f, dir );
-   v3_sub( dir, (v3f){1.0f,1.0f,1.0f}, dir );
+   v3_muls( p0, u*u, p );
+   v3_muladds( p, p1, 2.0f*u*t, p );
+   v3_muladds( p, p2, t*t, p );
+}
 
-   v3_normalize( dir );
+/*
+ * -----------------------------------------------------------------------------
+ * Section 6.a            PSRNG and some distributions
+ * -----------------------------------------------------------------------------
+ */
+
+/* An implementation of the MT19937 Algorithm for the Mersenne Twister
+ * by Evan Sultanik.  Based upon the pseudocode in: M. Matsumoto and
+ * T. Nishimura, "Mersenne Twister: A 623-dimensionally
+ * equidistributed uniform pseudorandom number generator," ACM
+ * Transactions on Modeling and Computer Simulation Vol. 8, No. 1,
+ * January pp.3-30 1998.
+ *
+ * http://www.sultanik.com/Mersenne_twister
+ * https://github.com/ESultanik/mtwister/blob/master/mtwister.c
+ */
+
+#define MT_UPPER_MASK         0x80000000
+#define MT_LOWER_MASK         0x7fffffff
+#define MT_TEMPERING_MASK_B   0x9d2c5680
+#define MT_TEMPERING_MASK_C   0xefc60000
+
+#define MT_STATE_VECTOR_LENGTH 624
+
+/* changes to STATE_VECTOR_LENGTH also require changes to this */
+#define MT_STATE_VECTOR_M      397 
+
+struct {
+  u32 mt[MT_STATE_VECTOR_LENGTH];
+  i32 index;
+} 
+static vg_rand;
+
+static void vg_rand_seed( unsigned long seed ) 
+{
+   /* set initial seeds to mt[STATE_VECTOR_LENGTH] using the generator
+    * from Line 25 of Table 1 in: Donald Knuth, "The Art of Computer
+    * Programming," Vol. 2 (2nd Ed.) pp.102.
+    */
+   vg_rand.mt[0] = seed & 0xffffffff;
+   for( vg_rand.index=1; vg_rand.index<MT_STATE_VECTOR_LENGTH; vg_rand.index++ ){
+      vg_rand.mt[vg_rand.index] = 
+         (6069 * vg_rand.mt[vg_rand.index-1]) & 0xffffffff;
+   }
 }
 
-static inline void vg_rand_sphere( v3f co )
+/*
+ * Generates a pseudo-randomly generated long.
+ */
+static u32 vg_randu32(void) 
+{
+   u32 y;
+   /* mag[x] = x * 0x9908b0df for x = 0,1 */
+   static u32 mag[2] = {0x0, 0x9908b0df}; 
+   if( vg_rand.index >= MT_STATE_VECTOR_LENGTH || vg_rand.index < 0 ){
+      /* generate STATE_VECTOR_LENGTH words at a time */
+      int kk;
+      if( vg_rand.index >= MT_STATE_VECTOR_LENGTH+1 || vg_rand.index < 0 ){
+         vg_rand_seed( 4357 );
+      }
+      for( kk=0; kk<MT_STATE_VECTOR_LENGTH-MT_STATE_VECTOR_M; kk++ ){
+         y = (vg_rand.mt[kk] & MT_UPPER_MASK) | 
+             (vg_rand.mt[kk+1] & MT_LOWER_MASK);
+         vg_rand.mt[kk] = vg_rand.mt[kk+MT_STATE_VECTOR_M] ^ 
+                           (y >> 1) ^ mag[y & 0x1];
+      }
+      for( ; kk<MT_STATE_VECTOR_LENGTH-1; kk++ ){
+         y = (vg_rand.mt[kk] & MT_UPPER_MASK) | 
+             (vg_rand.mt[kk+1] & MT_LOWER_MASK);
+         vg_rand.mt[kk] = 
+            vg_rand.mt[ kk+(MT_STATE_VECTOR_M-MT_STATE_VECTOR_LENGTH)] ^ 
+                        (y >> 1) ^ mag[y & 0x1];
+      }
+      y = (vg_rand.mt[MT_STATE_VECTOR_LENGTH-1] & MT_UPPER_MASK) | 
+          (vg_rand.mt[0] & MT_LOWER_MASK);
+      vg_rand.mt[MT_STATE_VECTOR_LENGTH-1] = 
+         vg_rand.mt[MT_STATE_VECTOR_M-1] ^ (y >> 1) ^ mag[y & 0x1];
+      vg_rand.index = 0;
+   }
+   y = vg_rand.mt[vg_rand.index++];
+   y ^= (y >> 11);
+   y ^= (y << 7) & MT_TEMPERING_MASK_B;
+   y ^= (y << 15) & MT_TEMPERING_MASK_C;
+   y ^= (y >> 18);
+   return y;
+}
+
+/*
+ * Generates a pseudo-randomly generated f64 in the range [0..1].
+ */
+static inline f64 vg_randf64(void)
 {
-   vg_rand_dir(co);
-   v3_muls( co, cbrtf( vg_randf() ), co );
+   return (f64)vg_randu32()/(f64)0xffffffff;
 }
 
-static inline int vg_randint(int max)
+static inline f64 vg_randf64_range( f64 min, f64 max )
 {
-   return rand()%max;
+   return vg_lerp( min, max, (f64)vg_randf64() );
 }
 
-static void eval_bezier_time( v3f p0, v3f p1, v3f h0, v3f h1, float t, v3f p )
+static inline void vg_rand_dir( v3f dir )
 {
-   float tt = t*t,
-         ttt = tt*t;
+   dir[0] = vg_randf64();
+   dir[1] = vg_randf64();
+   dir[2] = vg_randf64();
 
-   v3_muls( p1, ttt, p );
-   v3_muladds( p, h1, 3.0f*tt  -3.0f*ttt, p );
-   v3_muladds( p, h0, 3.0f*ttt -6.0f*tt  +3.0f*t, p );
-   v3_muladds( p, p0, 3.0f*tt  -ttt -3.0f*t +1.0f, p );
+   v3_muls( dir, 2.0f, dir );
+   v3_sub( dir, (v3f){1.0f,1.0f,1.0f}, dir );
+
+   v3_normalize( dir );
 }
 
-static void eval_bezier3( v3f p0, v3f p1, v3f p2, float t, v3f p )
+static inline void vg_rand_sphere( v3f co )
 {
-   float u = 1.0f-t;
-
-   v3_muls( p0, u*u, p );
-   v3_muladds( p, p1, 2.0f*u*t, p );
-   v3_muladds( p, p2, t*t, p );
+   vg_rand_dir(co);
+   v3_muls( co, cbrtf( vg_randf64() ), co );
 }
 
 #endif /* VG_M_H */