+static inline void m4x4_mulv( m4x4f m, v4f v, v4f d )
+{
+ v4f res;
+
+ res[0] = m[0][0]*v[0] + m[1][0]*v[1] + m[2][0]*v[2] + m[3][0]*v[3];
+ res[1] = m[0][1]*v[0] + m[1][1]*v[1] + m[2][1]*v[2] + m[3][1]*v[3];
+ res[2] = m[0][2]*v[0] + m[1][2]*v[1] + m[2][2]*v[2] + m[3][2]*v[3];
+ res[3] = m[0][3]*v[0] + m[1][3]*v[1] + m[2][3]*v[2] + m[3][3]*v[3];
+
+ v4_copy( res, d );
+}
+
+static inline void m4x4_inv( m4x4f a, m4x4f d )
+{
+ float a00 = a[0][0], a01 = a[0][1], a02 = a[0][2], a03 = a[0][3],
+ a10 = a[1][0], a11 = a[1][1], a12 = a[1][2], a13 = a[1][3],
+ a20 = a[2][0], a21 = a[2][1], a22 = a[2][2], a23 = a[2][3],
+ a30 = a[3][0], a31 = a[3][1], a32 = a[3][2], a33 = a[3][3],
+ det,
+ t[6];
+
+ t[0] = a22*a33 - a32*a23;
+ t[1] = a21*a33 - a31*a23;
+ t[2] = a21*a32 - a31*a22;
+ t[3] = a20*a33 - a30*a23;
+ t[4] = a20*a32 - a30*a22;
+ t[5] = a20*a31 - a30*a21;
+
+ d[0][0] = a11*t[0] - a12*t[1] + a13*t[2];
+ d[1][0] =-(a10*t[0] - a12*t[3] + a13*t[4]);
+ d[2][0] = a10*t[1] - a11*t[3] + a13*t[5];
+ d[3][0] =-(a10*t[2] - a11*t[4] + a12*t[5]);
+
+ d[0][1] =-(a01*t[0] - a02*t[1] + a03*t[2]);
+ d[1][1] = a00*t[0] - a02*t[3] + a03*t[4];
+ d[2][1] =-(a00*t[1] - a01*t[3] + a03*t[5]);
+ d[3][1] = a00*t[2] - a01*t[4] + a02*t[5];
+
+ t[0] = a12*a33 - a32*a13;
+ t[1] = a11*a33 - a31*a13;
+ t[2] = a11*a32 - a31*a12;
+ t[3] = a10*a33 - a30*a13;
+ t[4] = a10*a32 - a30*a12;
+ t[5] = a10*a31 - a30*a11;
+
+ d[0][2] = a01*t[0] - a02*t[1] + a03*t[2];
+ d[1][2] =-(a00*t[0] - a02*t[3] + a03*t[4]);
+ d[2][2] = a00*t[1] - a01*t[3] + a03*t[5];
+ d[3][2] =-(a00*t[2] - a01*t[4] + a02*t[5]);
+
+ t[0] = a12*a23 - a22*a13;
+ t[1] = a11*a23 - a21*a13;
+ t[2] = a11*a22 - a21*a12;
+ t[3] = a10*a23 - a20*a13;
+ t[4] = a10*a22 - a20*a12;
+ t[5] = a10*a21 - a20*a11;
+
+ d[0][3] =-(a01*t[0] - a02*t[1] + a03*t[2]);
+ d[1][3] = a00*t[0] - a02*t[3] + a03*t[4];
+ d[2][3] =-(a00*t[1] - a01*t[3] + a03*t[5]);
+ d[3][3] = a00*t[2] - a01*t[4] + a02*t[5];
+
+ det = 1.0f / (a00*d[0][0] + a01*d[1][0] + a02*d[2][0] + a03*d[3][0]);
+ v4_muls( d[0], det, d[0] );
+ v4_muls( d[1], det, d[1] );
+ v4_muls( d[2], det, d[2] );
+ v4_muls( d[3], det, d[3] );
+}
+