+ m4x3_expand_aabb_point( m, box, (v3f){ b[0], a[1], b[2] } );
+}
+
+int ray_aabb( boxf box, v3f co, v3f dir, float dist )
+{
+ v3f v0, v1;
+ float tmin, tmax;
+
+ v3_sub( box[0], co, v0 );
+ v3_sub( box[1], co, v1 );
+ v3_div( v0, dir, v0 );
+ v3_div( v1, dir, v1 );
+
+ tmin = vg_minf( v0[0], v1[0] );
+ tmax = vg_maxf( v0[0], v1[0] );
+ tmin = vg_maxf( tmin, vg_minf( v0[1], v1[1] ));
+ tmax = vg_minf( tmax, vg_maxf( v0[1], v1[1] ));
+ tmin = vg_maxf( tmin, vg_minf( v0[2], v1[2] ));
+ tmax = vg_minf( tmax, vg_maxf( v0[2], v1[2] ));
+
+ return tmax >= tmin && tmin < dist && tmax > 0;
+}
+
+static inline void m4x3_lookat( m4x3f m, v3f pos, v3f target, v3f up )
+{
+ v3f dir;
+ v3_sub( target, pos, dir );
+ v3_normalize( dir );
+
+ v3_copy( dir, m[2] );
+
+ v3_cross( up, m[2], m[0] );
+ v3_normalize( m[0] );
+
+ v3_cross( m[2], m[0], m[1] );
+ v3_copy( pos, m[3] );
+}
+
+/*
+ * Matrix 4x4
+ */
+
+#define M4X4_IDENTITY {{1.0f, 0.0f, 0.0f, 0.0f },\
+ { 0.0f, 1.0f, 0.0f, 0.0f },\
+ { 0.0f, 0.0f, 1.0f, 0.0f },\
+ { 0.0f, 0.0f, 0.0f, 1.0f }}
+#define M4X4_ZERO {{0.0f, 0.0f, 0.0f, 0.0f },\
+ { 0.0f, 0.0f, 0.0f, 0.0f },\
+ { 0.0f, 0.0f, 0.0f, 0.0f },\
+ { 0.0f, 0.0f, 0.0f, 0.0f }}
+
+static void m4x4_projection( m4x4f m, float angle,
+ float ratio, float fnear, float ffar )
+{
+ float scale = tanf( angle * 0.5f * VG_PIf / 180.0f ) * fnear,
+ r = ratio * scale,
+ l = -r,
+ t = scale,
+ b = -t;
+
+ m[0][0] = 2.0f * fnear / (r - l);
+ m[0][1] = 0.0f;
+ m[0][2] = 0.0f;
+ m[0][3] = 0.0f;
+ m[1][0] = 0.0f;
+ m[1][1] = 2.0f * fnear / (t - b);
+ m[1][2] = 0.0f;
+ m[1][3] = 0.0f;
+ m[2][0] = (r + l) / (r - l);
+ m[2][1] = (t + b) / (t - b);
+ m[2][2] = -(ffar + fnear) / (ffar - fnear);
+ m[2][3] = -1.0f;
+ m[3][0] = 0.0f;
+ m[3][1] = 0.0f;
+ m[3][2] = -2.0f * ffar * fnear / (ffar - fnear);
+ m[3][3] = 0.0f;
+}
+
+static void m4x4_translate( m4x4f m, v3f v )
+{
+ v4_muladds( m[3], m[0], v[0], m[3] );
+ v4_muladds( m[3], m[1], v[1], m[3] );
+ v4_muladds( m[3], m[2], v[2], m[3] );
+}
+
+static inline void m4x4_copy( m4x4f a, m4x4f b )
+{
+ v4_copy( a[0], b[0] );
+ v4_copy( a[1], b[1] );
+ v4_copy( a[2], b[2] );
+ v4_copy( a[3], b[3] );
+}
+
+static inline void m4x4_identity( m4x4f a )
+{
+ m4x4f id = M4X4_IDENTITY;
+ m4x4_copy( id, a );
+}
+
+static inline void m4x4_zero( m4x4f a )
+{
+ m4x4f zero = M4X4_ZERO;
+ m4x4_copy( zero, a );
+}
+
+static inline void m4x4_mul( m4x4f a, m4x4f b, m4x4f d )
+{
+ float a00 = a[0][0], a01 = a[0][1], a02 = a[0][2], a03 = a[0][3],
+ a10 = a[1][0], a11 = a[1][1], a12 = a[1][2], a13 = a[1][3],
+ a20 = a[2][0], a21 = a[2][1], a22 = a[2][2], a23 = a[2][3],
+ a30 = a[3][0], a31 = a[3][1], a32 = a[3][2], a33 = a[3][3],
+
+ b00 = b[0][0], b01 = b[0][1], b02 = b[0][2], b03 = b[0][3],
+ b10 = b[1][0], b11 = b[1][1], b12 = b[1][2], b13 = b[1][3],
+ b20 = b[2][0], b21 = b[2][1], b22 = b[2][2], b23 = b[2][3],
+ b30 = b[3][0], b31 = b[3][1], b32 = b[3][2], b33 = b[3][3];
+
+ d[0][0] = a00*b00 + a10*b01 + a20*b02 + a30*b03;
+ d[0][1] = a01*b00 + a11*b01 + a21*b02 + a31*b03;
+ d[0][2] = a02*b00 + a12*b01 + a22*b02 + a32*b03;
+ d[0][3] = a03*b00 + a13*b01 + a23*b02 + a33*b03;
+ d[1][0] = a00*b10 + a10*b11 + a20*b12 + a30*b13;
+ d[1][1] = a01*b10 + a11*b11 + a21*b12 + a31*b13;
+ d[1][2] = a02*b10 + a12*b11 + a22*b12 + a32*b13;
+ d[1][3] = a03*b10 + a13*b11 + a23*b12 + a33*b13;
+ d[2][0] = a00*b20 + a10*b21 + a20*b22 + a30*b23;
+ d[2][1] = a01*b20 + a11*b21 + a21*b22 + a31*b23;
+ d[2][2] = a02*b20 + a12*b21 + a22*b22 + a32*b23;
+ d[2][3] = a03*b20 + a13*b21 + a23*b22 + a33*b23;
+ d[3][0] = a00*b30 + a10*b31 + a20*b32 + a30*b33;
+ d[3][1] = a01*b30 + a11*b31 + a21*b32 + a31*b33;
+ d[3][2] = a02*b30 + a12*b31 + a22*b32 + a32*b33;
+ d[3][3] = a03*b30 + a13*b31 + a23*b32 + a33*b33;
+}
+
+static inline void m4x4_mulv( m4x4f m, v4f v, v4f d )
+{
+ v4f res;
+
+ res[0] = m[0][0]*v[0] + m[1][0]*v[1] + m[2][0]*v[2] + m[3][0]*v[3];
+ res[1] = m[0][1]*v[0] + m[1][1]*v[1] + m[2][1]*v[2] + m[3][1]*v[3];
+ res[2] = m[0][2]*v[0] + m[1][2]*v[1] + m[2][2]*v[2] + m[3][2]*v[3];
+ res[3] = m[0][3]*v[0] + m[1][3]*v[1] + m[2][3]*v[2] + m[3][3]*v[3];
+
+ v4_copy( res, d );
+}
+
+static inline void m4x4_inv( m4x4f a, m4x4f d )
+{
+ float a00 = a[0][0], a01 = a[0][1], a02 = a[0][2], a03 = a[0][3],
+ a10 = a[1][0], a11 = a[1][1], a12 = a[1][2], a13 = a[1][3],
+ a20 = a[2][0], a21 = a[2][1], a22 = a[2][2], a23 = a[2][3],
+ a30 = a[3][0], a31 = a[3][1], a32 = a[3][2], a33 = a[3][3],
+ det,
+ t[6];
+
+ t[0] = a22*a33 - a32*a23;
+ t[1] = a21*a33 - a31*a23;
+ t[2] = a21*a32 - a31*a22;
+ t[3] = a20*a33 - a30*a23;
+ t[4] = a20*a32 - a30*a22;
+ t[5] = a20*a31 - a30*a21;
+
+ d[0][0] = a11*t[0] - a12*t[1] + a13*t[2];
+ d[1][0] =-(a10*t[0] - a12*t[3] + a13*t[4]);
+ d[2][0] = a10*t[1] - a11*t[3] + a13*t[5];
+ d[3][0] =-(a10*t[2] - a11*t[4] + a12*t[5]);
+
+ d[0][1] =-(a01*t[0] - a02*t[1] + a03*t[2]);
+ d[1][1] = a00*t[0] - a02*t[3] + a03*t[4];
+ d[2][1] =-(a00*t[1] - a01*t[3] + a03*t[5]);
+ d[3][1] = a00*t[2] - a01*t[4] + a02*t[5];
+
+ t[0] = a12*a33 - a32*a13;
+ t[1] = a11*a33 - a31*a13;
+ t[2] = a11*a32 - a31*a12;
+ t[3] = a10*a33 - a30*a13;
+ t[4] = a10*a32 - a30*a12;
+ t[5] = a10*a31 - a30*a11;
+
+ d[0][2] = a01*t[0] - a02*t[1] + a03*t[2];
+ d[1][2] =-(a00*t[0] - a02*t[3] + a03*t[4]);
+ d[2][2] = a00*t[1] - a01*t[3] + a03*t[5];
+ d[3][2] =-(a00*t[2] - a01*t[4] + a02*t[5]);
+
+ t[0] = a12*a23 - a22*a13;
+ t[1] = a11*a23 - a21*a13;
+ t[2] = a11*a22 - a21*a12;
+ t[3] = a10*a23 - a20*a13;
+ t[4] = a10*a22 - a20*a12;
+ t[5] = a10*a21 - a20*a11;
+
+ d[0][3] =-(a01*t[0] - a02*t[1] + a03*t[2]);
+ d[1][3] = a00*t[0] - a02*t[3] + a03*t[4];
+ d[2][3] =-(a00*t[1] - a01*t[3] + a03*t[5]);
+ d[3][3] = a00*t[2] - a01*t[4] + a02*t[5];
+
+ det = 1.0f / (a00*d[0][0] + a01*d[1][0] + a02*d[2][0] + a03*d[3][0]);
+ v4_muls( d[0], det, d[0] );
+ v4_muls( d[1], det, d[1] );
+ v4_muls( d[2], det, d[2] );
+ v4_muls( d[3], det, d[3] );