//
// Created by Eugeny Grishul
//
// See license at http://bamelg.com/license.txt
//
using System.Runtime;
using System.Runtime.InteropServices;
namespace System.Numerics {
[Alignment( Boundary = 16 )]
public struct Matrix4x4 {
public float[4, 4] Value;
public Matrix4x4() { }
public Matrix4x4( float[16]& values ) { Memory.Copy( Value, values, sizeof( Value ) ); }
public Matrix4x4( float[4, 4]& values ) { Memory.Copy( Value, values, sizeof( values ) ); }
public static readonly Matrix4x4 Identity = new Matrix4x4 {
Value = new float[4, 4] {
{ 1, 0, 0, 0 },
{ 0, 1, 0, 0 },
{ 0, 0, 1, 0 },
{ 0, 0, 0, 1 }
}
};
public static Vector4 operator *( Vector4& vector, Matrix4x4& matrix ) {
return new Vector4( vector.X * matrix.Value[0, 0] + vector.Y * matrix.Value[0, 1] + vector.Z * matrix.Value[0, 2] + vector.W * matrix.Value[0, 3], vector.X * matrix.Value[1, 0] + vector.Y * matrix.Value[1, 1] + vector.Z * matrix.Value[1, 2] + vector.W * matrix.Value[1, 3], vector.X * matrix.Value[2, 0] + vector.Y * matrix.Value[2, 1] + vector.Z * matrix.Value[2, 2] + vector.W * matrix.Value[2, 3], vector.X * matrix.Value[3, 0] + vector.Y * matrix.Value[3, 1] + vector.Z * matrix.Value[3, 2] + vector.W * matrix.Value[3, 3] );
}
internal const CallingConvention DefaultCallingConvention = CallingConvention.Default;
// internal const CallingConvention DefaultCallingConvention = Environment.CurrentCpu == System.Runtime.CompilerServices.CpuID.X86_32 ? CallingConvention.FastCallX86 : CallingConvention.Default;
[CallingConvention( Value = DefaultCallingConvention )]
public static Vector4 operator *( Matrix4x4& matrix, Vector4& vector ) asm {
X86_32 {
feature( SSE2 ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - matrix
// edx - vector
mov eax, [esp + 4] // result
}
else {
mov ecx, [esp + 4] // matrix
mov edx, [esp + 8] // vector
mov eax, [esp + 12] // result
}
movaps xmm4, [edx]
movaps xmm0, [ecx]
movaps xmm1, [ecx + 16]
movaps xmm2, [ecx + 32]
movaps xmm3, [ecx + 48]
pshufd xmm5, xmm4, 0b01010101
pshufd xmm6, xmm4, 0b10101010
pshufd xmm7, xmm4, 0b11111111
pshufd xmm4, xmm4, 0b00000000
mulps xmm0, xmm4
mulps xmm1, xmm5
mulps xmm2, xmm6
mulps xmm3, xmm7
addps xmm0, xmm1
addps xmm2, xmm3
addps xmm0, xmm2
movaps [eax], xmm0
retn
}
feature( SSE1 ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - matrix
// edx - vector
mov eax, [esp + 4] // result
}
else {
mov ecx, [esp + 4] // matrix
mov edx, [esp + 8] // vector
mov eax, [esp + 12] // result
}
movaps xmm4, [edx]
movaps xmm0, [ecx]
movaps xmm1, [ecx + 16]
movaps xmm2, [ecx + 32]
movaps xmm3, [ecx + 48]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 0b00000000
shufps xmm5, xmm5, 0b01010101
shufps xmm6, xmm6, 0b10101010
shufps xmm7, xmm7, 0b11111111
mulps xmm0, xmm4
mulps xmm1, xmm5
mulps xmm2, xmm6
mulps xmm3, xmm7
addps xmm0, xmm1
addps xmm2, xmm3
addps xmm0, xmm2
movaps [eax], xmm0
retn
}
feature( AMD3DNow ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - matrix
// edx - vector
mov eax, [esp + 4] // result
}
else {
mov ecx, [esp + 4] // matrix
mov edx, [esp + 8] // vector
mov eax, [esp + 12] // result
}
pshufw mm4, [edx], 0b01000100
pshufw mm5, [edx], 0b11101110
pshufw mm6, [edx + 8], 0b01000100
pshufw mm7, [edx + 8], 0b11101110
movq mm0, [ecx]
movq mm1, [ecx + 16]
movq mm2, [ecx + 32]
movq mm3, [ecx + 48]
pfmul mm0, mm4
pfmul mm1, mm5
pfmul mm2, mm6
pfmul mm3, mm7
pfadd mm0, mm1
pfadd mm2, mm3
pfadd mm0, mm2
movq [eax], mm0
movq mm0, [ecx + 8]
movq mm1, [ecx + 24]
movq mm2, [ecx + 40]
movq mm3, [ecx + 56]
pfmul mm0, mm4
pfmul mm1, mm5
pfmul mm2, mm6
pfmul mm3, mm7
pfadd mm0, mm1
pfadd mm2, mm3
pfadd mm0, mm2
movq [eax + 8], mm0
femms
retn
}
default {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - matrix
// edx - vector
mov eax, [esp + 4] // result
}
else {
mov ecx, [esp + 4] // matrix
mov edx, [esp + 8] // vector
mov eax, [esp + 12] // result
}
fld dword ptr [ecx]
fld dword ptr [ecx + 4]
fld dword ptr [ecx + 8]
fld dword ptr [ecx + 12]
fld dword ptr [edx]
fmul st4, st0
fmul st3, st0
fmul st2, st0
fmulp st1, st0
fld dword ptr [ecx + 16]
fld dword ptr [ecx + 20]
fld dword ptr [ecx + 24]
fld dword ptr [edx + 4]
fmul st3, st0
fmul st2, st0
fmul st1, st0
fmul dword ptr [ecx + 28]
faddp st4, st0
faddp st4, st0
faddp st4, st0
faddp st4, st0
fld dword ptr [ecx + 32]
fld dword ptr [ecx + 36]
fld dword ptr [ecx + 40]
fld dword ptr [edx + 8]
fmul st3, st0
fmul st2, st0
fmul st1, st0
fmul dword ptr [ecx + 44]
faddp st4, st0
faddp st4, st0
faddp st4, st0
faddp st4, st0
fld dword ptr [ecx + 48]
fld dword ptr [ecx + 52]
fld dword ptr [ecx + 56]
fld dword ptr [edx + 12]
fmul st3, st0
fmul st2, st0
fmul st1, st0
fmul dword ptr [ecx + 60]
faddp st4, st0
faddp st4, st0
faddp st4, st0
faddp st4, st0
fstp dword ptr [eax + 12]
fstp dword ptr [eax + 8]
fstp dword ptr [eax + 4]
fstp dword ptr [eax]
retn
}
}
default {
return new Vector4( vector.X * matrix.Value[0, 0] + vector.Y * matrix.Value[1, 0] + vector.Z * matrix.Value[2, 0] + vector.W * matrix.Value[3, 0], vector.X * matrix.Value[0, 1] + vector.Y * matrix.Value[1, 1] + vector.Z * matrix.Value[2, 1] + vector.W * matrix.Value[3, 1], vector.X * matrix.Value[0, 2] + vector.Y * matrix.Value[1, 2] + vector.Z * matrix.Value[2, 2] + vector.W * matrix.Value[3, 2], vector.X * matrix.Value[0, 3] + vector.Y * matrix.Value[1, 3] + vector.Z * matrix.Value[2, 3] + vector.W * matrix.Value[3, 3] );
}
}
public Matrix4x4& Inverse() {
this = Inversed;
return this;
}
[CallingConvention( Value = DefaultCallingConvention )]
public Matrix4x4& Transpose() asm {
X86_32 {
feature( SSE1 ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - this
}
else {
mov ecx, [esp + 4] // this
}
movaps xmm0, [ecx]
movaps xmm2, [ecx + 32]
movaps xmm1, xmm0
movaps xmm3, xmm2
unpcklps xmm0, [ecx + 16]
unpckhps xmm1, [ecx + 16]
unpcklps xmm2, [ecx + 48]
unpckhps xmm3, [ecx + 48]
movaps xmm4, xmm0
movaps xmm6, xmm1
shufps xmm0, xmm2, 0b01000100
shufps xmm4, xmm2, 0b11101110
shufps xmm1, xmm3, 0b01000100
shufps xmm6, xmm3, 0b11101110
movaps [ecx], xmm0
movaps [ecx + 16], xmm4
movaps [ecx + 32], xmm1
movaps [ecx + 48], xmm6
retn
}
feature( MMX ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - this
}
else {
mov ecx, [esp + 4] // this
}
movq mm0, [ecx]
movq mm1, [ecx + 8]
movq mm4, [ecx + 32]
movq mm5, [ecx + 40]
movq mm2, mm0
movq mm3, mm1
movq mm6, mm4
movq mm7, mm5
punpckldq mm0, [ecx + 16]
punpckhdq mm2, [ecx + 16]
punpckldq mm1, [ecx + 24]
punpckhdq mm3, [ecx + 24]
punpckldq mm4, [ecx + 48]
punpckhdq mm6, [ecx + 48]
punpckldq mm5, [ecx + 56]
punpckhdq mm7, [ecx + 56]
movq [ecx], mm0
movq [ecx + 8], mm4
movq [ecx + 16], mm2
movq [ecx + 24], mm6
movq [ecx + 32], mm1
movq [ecx + 40], mm5
movq [ecx + 48], mm3
movq [ecx + 56], mm7
emms
retn
}
default {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - this
}
else {
mov ecx, [esp + 4] // this
}
fld dword ptr [ecx + 4]
fld dword ptr [ecx + 8]
fld dword ptr [ecx + 12]
fld dword ptr [ecx + 16]
fld dword ptr [ecx + 32]
fld dword ptr [ecx + 48]
fstp dword ptr [edx + 12]
fstp dword ptr [edx + 8]
fstp dword ptr [edx + 4]
fstp dword ptr [edx + 48]
fstp dword ptr [edx + 32]
fstp dword ptr [edx + 16]
fld dword ptr [ecx + 24]
fld dword ptr [ecx + 28]
fld dword ptr [ecx + 44]
fld dword ptr [ecx + 36]
fld dword ptr [ecx + 52]
fld dword ptr [ecx + 56]
fstp dword ptr [edx + 44]
fstp dword ptr [edx + 28]
fstp dword ptr [edx + 24]
fstp dword ptr [edx + 56]
fstp dword ptr [edx + 52]
fstp dword ptr [edx + 36]
retn
}
}
default {
this = Transposed;
return this;
}
}
public Matrix4x4 Transposed {
[CallingConvention( Value = DefaultCallingConvention )]
get asm {
X86_32 {
feature( SSE1 ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - this
// edx - result
}
else {
mov ecx, [esp + 4] // this
mov edx, [esp + 8] // result
}
movups xmm0, [ecx]
movups xmm2, [ecx + 32]
movups xmm1, xmm0
movups xmm3, xmm2
unpcklps xmm0, [ecx + 16]
unpckhps xmm1, [ecx + 16]
unpcklps xmm2, [ecx + 48]
unpckhps xmm3, [ecx + 48]
movaps xmm4, xmm0
movaps xmm6, xmm1
shufps xmm0, xmm2, 0b01000100
shufps xmm4, xmm2, 0b11101110
shufps xmm1, xmm3, 0b01000100
shufps xmm6, xmm3, 0b11101110
movups [edx], xmm0
movups [edx + 16], xmm4
movups [edx + 32], xmm1
movups [edx + 48], xmm6
retn
}
feature( MMX ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - this
// edx - result
}
else {
mov ecx, [esp + 4] // this
mov edx, [esp + 8] // result
}
movq mm0, [ecx]
movq mm1, [ecx + 8]
movq mm4, [ecx + 32]
movq mm5, [ecx + 40]
movq mm2, mm0
movq mm3, mm1
movq mm6, mm4
movq mm7, mm5
punpckldq mm0, [ecx + 16]
punpckhdq mm2, [ecx + 16]
punpckldq mm1, [ecx + 24]
punpckhdq mm3, [ecx + 24]
punpckldq mm4, [ecx + 48]
punpckhdq mm6, [ecx + 48]
punpckldq mm5, [ecx + 56]
punpckhdq mm7, [ecx + 56]
movq [edx], mm0
movq [edx + 8], mm4
movq [edx + 16], mm2
movq [edx + 24], mm6
movq [edx + 32], mm1
movq [edx + 40], mm5
movq [edx + 48], mm3
movq [edx + 56], mm7
emms
retn
}
default {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - this
// edx - result
}
else {
mov ecx, [esp + 4] // this
mov edx, [esp + 8] // result
}
fld dword ptr [ecx + 4]
fld dword ptr [ecx + 8]
fld dword ptr [ecx + 12]
fld dword ptr [ecx + 16]
fld dword ptr [ecx + 32]
fld dword ptr [ecx + 48]
fstp dword ptr [edx + 12]
fstp dword ptr [edx + 8]
fstp dword ptr [edx + 4]
fstp dword ptr [edx + 48]
fstp dword ptr [edx + 32]
fstp dword ptr [edx + 16]
fld dword ptr [ecx + 24]
fld dword ptr [ecx + 28]
fld dword ptr [ecx + 44]
fld dword ptr [ecx + 36]
fld dword ptr [ecx + 52]
fld dword ptr [ecx + 56]
fstp dword ptr [edx + 44]
fstp dword ptr [edx + 28]
fstp dword ptr [edx + 24]
fstp dword ptr [edx + 56]
fstp dword ptr [edx + 52]
fstp dword ptr [edx + 36]
cmp ecx, edx
jz ResultAndSourceSame
fld dword ptr [ecx]
fld dword ptr [ecx + 20]
fld dword ptr [ecx + 40]
fld dword ptr [ecx + 60]
fstp dword ptr [edx + 60]
fstp dword ptr [edx + 40]
fstp dword ptr [edx + 20]
fstp dword ptr [edx]
ResultAndSourceSame:
retn
}
}
default {
Matrix4x4 result;
[Unroll]
for( var i = 0; i < 4; ++i )
[Unroll]
for( var j = 0; j < 4; ++j )
result.Value[i, j] = Value[j, i];
return result;
}
}
}
public Matrix4x4 Inversed {
[CallingConvention( Value = DefaultCallingConvention )]
get asm {
X86_32 {
feature( SSE1 ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - this
// edx - result
}
else {
mov ecx, [esp + 4] // this
mov edx, [esp + 8] // result
}
mov eax, esp
and esp, -16
sub esp, 0xB0
movlps xmm2, [ecx + 8]
movlps xmm4, [ecx + 40]
movhps xmm2, [ecx + 24]
movhps xmm4, [ecx + 56]
movlps xmm3, [ecx + 32]
movlps xmm1, [ecx]
movhps xmm3, [ecx + 48]
movhps xmm1, [ecx + 16]
movaps xmm5, xmm2
shufps xmm5, xmm4, 0b10001000
shufps xmm4, xmm2, 0b11011101
movaps xmm2, xmm4
mulps xmm2, xmm5
shufps xmm2, xmm2, 0b10110001
movaps xmm6, xmm2
shufps xmm6, xmm6, 0b01001110
movaps xmm7, xmm3
shufps xmm3, xmm1, 0b11011101
shufps xmm1, xmm7, 0b10001000
movaps xmm7, xmm3
mulps xmm3, xmm6
mulps xmm6, xmm1
movaps xmm0, xmm6
movaps xmm6, xmm7
mulps xmm7, xmm2
mulps xmm2, xmm1
subps xmm3, xmm7
movaps xmm7, xmm6
mulps xmm7, xmm5
shufps xmm5, xmm5, 0b01001110
shufps xmm7, xmm7, 0b10110001
movaps [esp + 16], xmm2
movaps xmm2, xmm4
mulps xmm2, xmm7
addps xmm2, xmm3
movaps xmm3, xmm7
shufps xmm7, xmm7, 0b01001110
mulps xmm3, xmm1
movaps [esp + 32], xmm3
movaps xmm3, xmm4
mulps xmm3, xmm7
mulps xmm7, xmm1
subps xmm2, xmm3
movaps xmm3, xmm6
shufps xmm3, xmm3, 0b01001110
mulps xmm3, xmm4
shufps xmm3, xmm3, 0b10110001
movaps [esp + 48], xmm7
movaps xmm7, xmm5
mulps xmm5, xmm3
addps xmm5, xmm2
movaps xmm2, xmm3
shufps xmm3, xmm3, 0b01001110
mulps xmm2, xmm1
movaps [esp + 64], xmm4
movaps xmm4, xmm7
mulps xmm7, xmm3
mulps xmm3, xmm1
subps xmm5, xmm7
subps xmm3, xmm2
movaps xmm2, xmm1
mulps xmm1, xmm5
shufps xmm3, xmm3, 0b01001110
movaps xmm7, xmm1
shufps xmm1, xmm1, 0b01001110
movaps [esp], xmm5
addps xmm1, xmm7
movaps xmm5, xmm1
shufps xmm1, xmm1, 0b10110001
addss xmm1, xmm5
movaps xmm5, xmm6
mulps xmm5, xmm2
shufps xmm5, xmm5, 0b10110001
movaps xmm7, xmm5
shufps xmm5, xmm5, 0b01001110
movaps [esp + 80], xmm4
movaps xmm4, [esp + 64]
movaps [esp + 64], xmm6
movaps xmm6, xmm4
mulps xmm6, xmm7
addps xmm6, xmm3
movaps xmm3, xmm4
mulps xmm3, xmm5
subps xmm3, xmm6
movaps xmm6, xmm4
mulps xmm6, xmm2
shufps xmm6, xmm6, 0b10110001
movaps [esp + 112], xmm5
movaps xmm5, [esp + 64]
movaps [esp + 128], xmm7
movaps xmm7, xmm6
mulps xmm7, xmm5
addps xmm7, xmm3
movaps xmm3, xmm6
shufps xmm3, xmm3, 0b01001110
movaps [esp + 144], xmm4
movaps xmm4, xmm5
mulps xmm5, xmm3
movaps [esp + 160], xmm4
movaps xmm4, xmm6
movaps xmm6, xmm7
subps xmm6, xmm5
movaps xmm5, xmm0
movaps xmm7, [esp + 16]
subps xmm5, xmm7
shufps xmm5, xmm5, 0b01001110
movaps xmm7, [esp + 80]
mulps xmm4, xmm7
mulps xmm3, xmm7
subps xmm5, xmm4
mulps xmm2, xmm7
addps xmm3, xmm5
shufps xmm2, xmm2, 0b10110001
movaps xmm4, xmm2
shufps xmm4, xmm4, 0b01001110
movaps xmm5, [esp + 144]
movaps xmm0, xmm6
movaps xmm6, xmm5
mulps xmm5, xmm2
mulps xmm6, xmm4
addps xmm5, xmm3
movaps xmm3, xmm4
movaps xmm4, xmm5
subps xmm4, xmm6
movaps xmm5, [esp + 48]
movaps xmm6, [esp + 32]
subps xmm5, xmm6
shufps xmm5, xmm5, 0b01001110
movaps xmm6, [esp + 128]
mulps xmm6, xmm7
subps xmm6, xmm5
movaps xmm5, [esp + 112]
mulps xmm7, xmm5
subps xmm6, xmm7
movaps xmm5, [esp + 160]
mulps xmm2, xmm5
mulps xmm5, xmm3
subps xmm6, xmm2
movaps xmm2, xmm5
addps xmm2, xmm6
movaps xmm6, xmm0
movaps xmm0, xmm1
movaps xmm1, [esp]
movaps xmm3, xmm0
rcpss xmm5, xmm0
mulss xmm0, xmm5
mulss xmm0, xmm5
addss xmm5, xmm5
subss xmm5, xmm0
movaps xmm0, xmm5
addss xmm5, xmm5
mulss xmm0, xmm0
mulss xmm3, xmm0
subss xmm5, xmm3
shufps xmm5, xmm5, 0
mulps xmm1, xmm5
mulps xmm4, xmm5
mulps xmm6, xmm5
mulps xmm5, xmm2
movaps [edx], xmm1
movaps [edx + 16], xmm4
movaps [edx + 32], xmm6
movaps [edx + 48], xmm5
mov esp, eax
retn
}
}
default {
Matrix4x4 result;
float[12] pairs;
pairs[0] = Value[2, 2] * Value[3, 3];
pairs[1] = Value[3, 2] * Value[2, 3];
pairs[2] = Value[1, 2] * Value[3, 3];
pairs[3] = Value[3, 2] * Value[1, 3];
pairs[4] = Value[1, 2] * Value[2, 3];
pairs[5] = Value[2, 2] * Value[1, 3];
pairs[6] = Value[0, 2] * Value[3, 3];
pairs[7] = Value[3, 2] * Value[0, 3];
pairs[8] = Value[0, 2] * Value[2, 3];
pairs[9] = Value[2, 2] * Value[0, 3];
pairs[10] = Value[0, 2] * Value[1, 3];
pairs[11] = Value[1, 2] * Value[0, 3];
result.Value[0, 0] = ( pairs[0] * Value[1, 1] + pairs[3] * Value[2, 1] + pairs[4] * Value[3, 1] ) - ( pairs[1] * Value[1, 1] + pairs[2] * Value[2, 1] + pairs[5] * Value[3, 1] );
result.Value[0, 1] = ( pairs[1] * Value[0, 1] + pairs[6] * Value[2, 1] + pairs[9] * Value[3, 1] ) - ( pairs[0] * Value[0, 1] + pairs[7] * Value[2, 1] + pairs[8] * Value[3, 1] );
result.Value[0, 2] = ( pairs[2] * Value[0, 1] + pairs[7] * Value[1, 1] + pairs[10] * Value[3, 1] ) - ( pairs[3] * Value[0, 1] + pairs[6] * Value[1, 1] + pairs[11] * Value[3, 1] );
result.Value[0, 3] = ( pairs[5] * Value[0, 1] + pairs[8] * Value[1, 1] + pairs[11] * Value[2, 1] ) - ( pairs[4] * Value[0, 1] + pairs[9] * Value[1, 1] + pairs[10] * Value[2, 1] );
result.Value[1, 0] = ( pairs[1] * Value[1, 0] + pairs[2] * Value[2, 0] + pairs[5] * Value[3, 0] ) - ( pairs[0] * Value[1, 0] + pairs[3] * Value[2, 0] + pairs[4] * Value[3, 0] );
result.Value[1, 1] = ( pairs[0] * Value[0, 0] + pairs[7] * Value[2, 0] + pairs[8] * Value[3, 0] ) - ( pairs[1] * Value[0, 0] + pairs[6] * Value[2, 0] + pairs[9] * Value[3, 0] );
result.Value[1, 2] = ( pairs[3] * Value[0, 0] + pairs[6] * Value[1, 0] + pairs[11] * Value[3, 0] ) - ( pairs[2] * Value[0, 0] + pairs[7] * Value[1, 0] + pairs[10] * Value[3, 0] );
result.Value[1, 3] = ( pairs[4] * Value[0, 0] + pairs[9] * Value[1, 0] + pairs[10] * Value[2, 0] ) - ( pairs[5] * Value[0, 0] + pairs[8] * Value[1, 0] + pairs[11] * Value[2, 0] );
pairs[0] = Value[2, 0] * Value[3, 1];
pairs[1] = Value[3, 0] * Value[2, 1];
pairs[2] = Value[1, 0] * Value[3, 1];
pairs[3] = Value[3, 0] * Value[1, 1];
pairs[4] = Value[1, 0] * Value[2, 1];
pairs[5] = Value[2, 0] * Value[1, 1];
pairs[6] = Value[0, 0] * Value[3, 1];
pairs[7] = Value[3, 0] * Value[0, 1];
pairs[8] = Value[0, 0] * Value[2, 1];
pairs[9] = Value[2, 0] * Value[0, 1];
pairs[10] = Value[0, 0] * Value[1, 1];
pairs[11] = Value[1, 0] * Value[0, 1];
result.Value[2, 0] = ( pairs[0] * Value[1, 3] + pairs[3] * Value[2, 3] + pairs[4] * Value[3, 3] ) - ( pairs[1] * Value[1, 3] + pairs[2] * Value[2, 3] + pairs[5] * Value[3, 3] );
result.Value[2, 1] = ( pairs[1] * Value[0, 3] + pairs[6] * Value[2, 3] + pairs[9] * Value[3, 3] ) - ( pairs[0] * Value[0, 3] + pairs[7] * Value[2, 3] + pairs[8] * Value[3, 3] );
result.Value[2, 2] = ( pairs[2] * Value[0, 3] + pairs[7] * Value[1, 3] + pairs[10] * Value[3, 3] ) - ( pairs[3] * Value[0, 3] + pairs[6] * Value[1, 3] + pairs[11] * Value[3, 3] );
result.Value[2, 3] = ( pairs[5] * Value[0, 3] + pairs[8] * Value[1, 3] + pairs[11] * Value[2, 3] ) - ( pairs[4] * Value[0, 3] + pairs[9] * Value[1, 3] + pairs[10] * Value[2, 3] );
result.Value[3, 0] = ( pairs[2] * Value[2, 2] + pairs[5] * Value[3, 2] + pairs[1] * Value[1, 2] ) - ( pairs[4] * Value[3, 2] + pairs[0] * Value[1, 2] + pairs[3] * Value[2, 2] );
result.Value[3, 1] = ( pairs[8] * Value[3, 2] + pairs[0] * Value[0, 2] + pairs[7] * Value[2, 2] ) - ( pairs[6] * Value[2, 2] + pairs[9] * Value[3, 2] + pairs[1] * Value[0, 2] );
result.Value[3, 2] = ( pairs[6] * Value[1, 2] + pairs[11] * Value[3, 2] + pairs[3] * Value[0, 2] ) - ( pairs[10] * Value[3, 2] + pairs[2] * Value[0, 2] + pairs[7] * Value[1, 2] );
result.Value[3, 3] = ( pairs[10] * Value[2, 2] + pairs[4] * Value[0, 2] + pairs[9] * Value[1, 2] ) - ( pairs[8] * Value[1, 2] + pairs[11] * Value[2, 2] + pairs[5] * Value[0, 2] );
var determinant = 1f / ( Value[0, 0] * result.Value[0, 0] + Value[1, 0] * result.Value[0, 1] + Value[2, 0] * result.Value[0, 2] + Value[3, 0] * result.Value[0, 3] );
[Unroll]
for( var i = 0; i < 4; ++i ) {
result.Value[i, 0] *= determinant;
result.Value[i, 1] *= determinant;
result.Value[i, 2] *= determinant;
result.Value[i, 3] *= determinant;
}
return result;
}
}
}
[ForceInline]
public Matrix4x4& Translate( float x, float y, float z ) {
Value[3, 0] += Value[0, 0] * x + Value[1, 0] * y + Value[2, 0] * z;
Value[3, 1] += Value[0, 1] * x + Value[1, 1] * y + Value[2, 1] * z;
Value[3, 2] += Value[0, 2] * x + Value[1, 2] * y + Value[2, 2] * z;
Value[3, 3] += Value[0, 3] * x + Value[1, 3] * y + Value[2, 3] * z;
return this;
}
[ForceInline]
public Matrix4x4& Translate( Vector3& vector ) {
return Translate( vector.X, vector.Y, vector.Z );
}
public static bool operator ==( Matrix4x4& left, Matrix4x4& right ) asm {
X86_32 {
feature( SSE1 ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - left
// edx - right
}
else {
mov ecx, [esp + 4] // left
mov edx, [esp + 8] // right
}
movaps xmm0, [ecx]
movaps xmm1, [ecx + 16]
movaps xmm2, [ecx + 32]
movaps xmm3, [ecx + 48]
cmpeqps xmm0, [edx]
cmpeqps xmm1, [edx + 16]
cmpeqps xmm2, [edx + 32]
cmpeqps xmm3, [edx + 48]
andps xmm0, xmm1
andps xmm2, xmm3
andps xmm0, xmm2
movmskps eax, xmm0
cmp eax, 15
setz al
retn
}
feature( AMD3DNow ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - left
// edx - right
}
else {
mov ecx, [esp + 4] // left
mov edx, [esp + 8] // right
}
movq mm0, [ecx]
movq mm1, [ecx + 8]
movq mm2, [ecx + 16]
movq mm3, [ecx + 24]
movq mm4, [ecx + 32]
movq mm5, [ecx + 40]
movq mm6, [ecx + 48]
movq mm7, [ecx + 56]
pfcmpeq mm0, [edx]
pfcmpeq mm1, [edx + 8]
pfcmpeq mm2, [edx + 16]
pfcmpeq mm3, [edx + 24]
pfcmpeq mm4, [edx + 32]
pfcmpeq mm5, [edx + 40]
pfcmpeq mm6, [edx + 48]
pfcmpeq mm7, [edx + 56]
pand mm0, mm1
pand mm2, mm3
pand mm4, mm5
pand mm6, mm7
pand mm0, mm2
pand mm4, mm6
pand mm0, mm4
packssdw mm0, mm0
movd ecx, mm0
femms
xor eax, eax
cmp ecx, -1
setz al
retn
}
}
default {
float* value1 = left.Value, value2 = right.Value;
[Unroll]
for( int i = 0; i < 16; ++i )
if( value1[i] != value2[i] )
return false;
return true;
}
}
private static uint _mnull = 0x80000001;
private static int _minus1 = -1;
[CallingConvention( Value = DefaultCallingConvention )]
public bool Equals( Matrix4x4& other, int tolerance ) asm {
X86_32 {
feature( SSSE3 ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - this
// edx - other
movss xmm5, [esp + 4] // tolerance
}
else {
mov ecx, [esp + 4] // this
mov edx, [esp + 8] // other
movss xmm5, [esp + 12] // tolerance
}
movss xmm6, [[_mnull]]
movss xmm7, [[_minus1]]
movaps xmm0, [ecx]
movaps xmm1, [edx]
pshufd xmm5, xmm5, 0b0000000
pshufd xmm6, xmm6, 0b0000000
pshufd xmm7, xmm7, 0b0000000
movaps xmm2, xmm0
movaps xmm3, xmm1
movaps xmm4, xmm6
pcmpgtd xmm2, xmm7
pcmpgtd xmm3, xmm7
pxor xmm2, xmm3
pand xmm4, xmm2
pxor xmm0, xmm2
paddd xmm0, xmm4
psubd xmm0, xmm1
pabsd xmm0, xmm0
pcmpgtd xmm0, xmm5
// movmskps performs better
// ptest xmm0, xmm0
// jnz not_equal
movmskps eax, xmm0
test eax, eax
jnz NotEqual
//
movaps xmm0, [ecx + 16]
movaps xmm1, [edx + 16]
movaps xmm2, xmm0
movaps xmm3, xmm1
movaps xmm4, xmm6
pcmpgtd xmm2, xmm7
pcmpgtd xmm3, xmm7
pxor xmm2, xmm3
pand xmm4, xmm2
pxor xmm0, xmm2
paddd xmm0, xmm4
psubd xmm0, xmm1
pabsd xmm0, xmm0
pcmpgtd xmm0, xmm5
movmskps eax, xmm0
test eax, eax
jnz NotEqual
//
movaps xmm0, [ecx + 32]
movaps xmm1, [edx + 32]
movaps xmm2, xmm0
movaps xmm3, xmm1
movaps xmm4, xmm6
pcmpgtd xmm2, xmm7
pcmpgtd xmm3, xmm7
pxor xmm2, xmm3
pand xmm4, xmm2
pxor xmm0, xmm2
paddd xmm0, xmm4
psubd xmm0, xmm1
pabsd xmm0, xmm0
pcmpgtd xmm0, xmm5
movmskps eax, xmm0
test eax, eax
jnz NotEqual
//
movaps xmm0, [ecx + 48]
movaps xmm1, [edx + 48]
movaps xmm2, xmm0
movaps xmm3, xmm1
movaps xmm4, xmm6
pcmpgtd xmm2, xmm7
pcmpgtd xmm3, xmm7
pxor xmm2, xmm3
pand xmm4, xmm2
pxor xmm0, xmm2
paddd xmm0, xmm4
psubd xmm0, xmm1
pabsd xmm0, xmm0
pcmpgtd xmm0, xmm5
movmskps eax, xmm0
test eax, eax
jnz NotEqual
setz al
retn
NotEqual:
xor eax, eax
retn
}
}
default {
float* value1 = Value, value2 = other.Value;
[Unroll]
for( int i = 0; i < 16; ++i )
if( !float.AreEqual( value1[i], value2[i], tolerance ) )
return false;
return true;
}
}
public static bool operator !=( Matrix4x4& left, Matrix4x4& right ) {
return !( left == right );
}
public float Determinant {
[CallingConvention( Value = DefaultCallingConvention )]
get asm {
X86_32 {
feature( SSE1 ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - this
}
else {
mov ecx, [esp + 4] // this
}
movaps xmm0, [ecx + 32]
movaps xmm1, [ecx + 48]
movaps xmm2, [ecx + 16]
movaps xmm3, xmm0
movaps xmm4, xmm0
movaps xmm6, xmm1
movaps xmm7, xmm2
shufps xmm0, xmm0, 0b00011011
shufps xmm1, xmm1, 0b10110001
shufps xmm2, xmm2, 0b01001110
shufps xmm7, xmm7, 0b00111001
mulps xmm0, xmm1
shufps xmm3, xmm3, 0b01111101
shufps xmm6, xmm6, 0b00001010
movaps xmm5, xmm0
shufps xmm0, xmm0, 0b01001110
shufps xmm4, xmm4, 0b00001010
shufps xmm1, xmm1, 0b00101000
subps xmm5, xmm0
mulps xmm3, xmm6
mulps xmm4, xmm1
mulps xmm5, xmm2
shufps xmm2, xmm2, 0b00111001
subps xmm3, xmm4
movaps xmm0, xmm3
shufps xmm0, xmm0, 0b00111001
mulps xmm3, xmm2
mulps xmm0, xmm7
addps xmm5, xmm3
subps xmm5, xmm0
mulps xmm5, [ecx]
movhlps xmm7, xmm5
addps xmm5, xmm7
movaps xmm6, xmm5
shufps xmm6, xmm6, 0b00000001
addss xmm5, xmm6
mov eax, [esp]
movss [esp], xmm5
fld dword ptr [esp]
pop ecx
jmp eax
}
feature( AMD3DNow ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - this
}
else {
mov ecx, [esp + 4] // this
}
movq mm2, qword ptr [ecx + 0x20]
movq mm1, qword ptr [ecx + 0x28]
movq mm0, qword ptr [ecx + 0x24]
punpckhdq mm1, mm1
punpckldq mm2, mm2
movq mm5, qword ptr [ecx + 0x30]
movq mm4, qword ptr [ecx + 0x38]
movq mm3, qword ptr [ecx + 0x34]
punpckhdq mm4, mm4
punpckldq mm5, mm5
pfmul mm4, mm0
pfmul mm1, mm3
pfsub mm4, mm1
pfmul mm2, qword ptr [ecx + 0x38]
pfmul mm5, qword ptr [ecx + 0x28]
pfsub mm2, mm5
pfmul mm3, qword ptr [ecx + 0x20]
pfmul mm0, qword ptr [ecx + 0x30]
pfsub mm3, mm0
movq mm0, mm2
punpckhdq mm0, mm0
punpckldq mm0, mm4
movq mm1, mm3
punpckhdq mm1, mm4
movq mm5, mm3
punpckldq mm5, mm2
movq mm6, qword ptr [ecx + 0x18]
movq mm7, mm6
punpckhdq mm6, mm6
pfmul mm4, qword ptr [ecx + 0x10]
pfmul mm3, mm6
pfadd mm4, mm3
movq mm3, qword ptr [ecx + 0x14]
pfmul mm0, mm3
pfsub mm4, mm0
movq mm0, qword ptr [ecx + 0x10]
punpckldq mm0, mm0
movq mm6, qword ptr [ecx + 8]
punpckldq mm6, dword ptr [ecx]
pfmul mm4, mm6
movq mm6, qword ptr [ecx + 8]
punpckhdq mm6, qword ptr [ecx]
pfmul mm5, mm7
pfmul mm2, mm3
pfmul mm1, mm0
pfadd mm1, mm5
pfsub mm1, mm2
pfmul mm1, mm6
pfsub mm4, mm1
pfacc mm4, mm4
mov eax, [esp]
movd dword ptr [esp], mm4
femms
fld dword ptr [esp]
pop ecx
jmp eax
}
}
default {
float[12] pairs;
pairs[0] = Value[2, 2] * Value[3, 3];
pairs[1] = Value[3, 2] * Value[2, 3];
pairs[2] = Value[1, 2] * Value[3, 3];
pairs[3] = Value[3, 2] * Value[1, 3];
pairs[4] = Value[1, 2] * Value[2, 3];
pairs[5] = Value[2, 2] * Value[1, 3];
pairs[6] = Value[0, 2] * Value[3, 3];
pairs[7] = Value[3, 2] * Value[0, 3];
pairs[8] = Value[0, 2] * Value[2, 3];
pairs[9] = Value[2, 2] * Value[0, 3];
pairs[10] = Value[0, 2] * Value[1, 3];
pairs[11] = Value[1, 2] * Value[0, 3];
var x = pairs[0] * Value[1, 1] + pairs[3] * Value[2, 1] + pairs[4] * Value[3, 1] - pairs[1] * Value[1, 1] - pairs[2] * Value[2, 1] - pairs[5] * Value[3, 1];
var y = pairs[1] * Value[0, 1] + pairs[6] * Value[2, 1] + pairs[9] * Value[3, 1] - pairs[0] * Value[0, 1] - pairs[7] * Value[2, 1] - pairs[8] * Value[3, 1];
var z = pairs[2] * Value[0, 1] + pairs[7] * Value[1, 1] + pairs[10] * Value[3, 1] - pairs[3] * Value[0, 1] - pairs[6] * Value[2, 1] - pairs[11] * Value[3, 1];
var w = pairs[5] * Value[0, 1] + pairs[8] * Value[1, 1] + pairs[11] * Value[2, 1] - pairs[4] * Value[0, 1] - pairs[9] * Value[1, 1] - pairs[10] * Value[2, 1];
return Value[0, 0] * x + Value[1, 0] * y + Value[2, 0] * z + Value[3, 0] * w;
}
}
}
public Matrix4x4& Scale( float x, float y, float z ) {
Value[0, 0] *= x;
Value[0, 1] *= x;
Value[0, 2] *= x;
Value[0, 3] *= x;
Value[1, 0] *= y;
Value[1, 1] *= y;
Value[1, 2] *= y;
Value[1, 3] *= y;
Value[2, 0] *= z;
Value[2, 1] *= z;
Value[2, 2] *= z;
Value[2, 3] *= z;
return this;
}
public Matrix4x4& RotateX( float angle ) {
float sin = Math.Sin( angle ), cos = Math.Cos( angle );
float[8] temp;
Memory.Copy( temp, &Value[1, 0], sizeof( temp ) );
Value[1, 0] = temp[0] * cos + temp[4] * sin;
Value[1, 1] = temp[1] * cos + temp[5] * sin;
Value[1, 2] = temp[2] * cos + temp[6] * sin;
Value[1, 3] = temp[3] * cos + temp[7] * sin;
Value[2, 0] = temp[4] * cos - temp[0] * sin;
Value[2, 1] = temp[5] * cos - temp[1] * sin;
Value[2, 2] = temp[6] * cos - temp[2] * sin;
Value[2, 3] = temp[7] * cos - temp[3] * sin;
return this;
}
public Matrix4x4& RotateY( float angle ) {
float sin = Math.Sin( angle ), cos = Math.Cos( angle );
float[4] temp1, temp2;
Memory.Copy( temp1, &Value[0, 0], sizeof( temp1 ) );
Memory.Copy( temp2, &Value[2, 0], sizeof( temp2 ) );
Value[0, 0] = temp1[0] * cos - temp2[0] * sin;
Value[0, 1] = temp1[1] * cos - temp2[1] * sin;
Value[0, 2] = temp1[2] * cos - temp2[2] * sin;
Value[0, 3] = temp1[3] * cos - temp2[3] * sin;
Value[2, 0] = temp1[0] * sin + temp2[0] * cos;
Value[2, 1] = temp1[1] * sin + temp2[1] * cos;
Value[2, 2] = temp1[2] * sin + temp2[2] * cos;
Value[2, 3] = temp1[3] * sin + temp2[3] * cos;
return this;
}
public Matrix4x4& RotateZ( float angle ) {
float sin = Math.Sin( angle ), cos = Math.Cos( angle );
float[8] temp;
Memory.Copy( temp, Value, sizeof( temp ) );
Value[0, 0] = temp[0] * cos + temp[4] * sin;
Value[0, 1] = temp[1] * cos + temp[5] * sin;
Value[0, 2] = temp[2] * cos + temp[6] * sin;
Value[0, 3] = temp[3] * cos + temp[7] * sin;
Value[1, 0] = temp[4] * cos - temp[0] * sin;
Value[1, 1] = temp[5] * cos - temp[1] * sin;
Value[1, 2] = temp[6] * cos - temp[2] * sin;
Value[1, 3] = temp[7] * cos - temp[3] * sin;
return this;
}
public Matrix4x4& Rotate( float x, float y, float z ) {
float A = Math.Cos( x ), B = Math.Sin( x ), C = Math.Cos( y ), D = Math.Sin( y ), E = Math.Cos( z ), F = Math.Sin( z );
float AC = A * C, AE = A * E, AF = A * F;
float BC = B * C, BE = B * E, BF = B * F;
float CE = C * E, CF = C * F;
float DE = D * E, DF = D * F;
float ADE = A * DE, ADF = A * DF;
float BDE = B * DE, BDF = B * DF;
float BF_ADE = BF - ADE;
float BE_ADF = BE + ADF;
float AF_BDE = AF + BDE;
float AE_BDF = AE - BDF;
float[12] temp;
temp[0] = CE * Value[0, 0] + AF_BDE * Value[1, 0] + BF_ADE * Value[2, 0];
temp[1] = CE * Value[0, 1] + AF_BDE * Value[1, 1] + BF_ADE * Value[2, 1];
temp[2] = CE * Value[0, 2] + AF_BDE * Value[1, 2] + BF_ADE * Value[2, 2];
temp[3] = CE * Value[0, 3] + AF_BDE * Value[1, 3] + BF_ADE * Value[2, 3];
temp[4] = -CF * Value[0, 0] + AE_BDF * Value[1, 0] + BE_ADF * Value[2, 0];
temp[5] = -CF * Value[0, 1] + AE_BDF * Value[1, 1] + BE_ADF * Value[2, 1];
temp[6] = -CF * Value[0, 2] + AE_BDF * Value[1, 2] + BE_ADF * Value[2, 2];
temp[7] = -CF * Value[0, 3] + AE_BDF * Value[1, 3] + BE_ADF * Value[2, 3];
temp[8] = D * Value[0, 0] - BC * Value[1, 0] + AC * Value[2, 0];
temp[9] = D * Value[0, 1] - BC * Value[1, 1] + AC * Value[2, 1];
temp[10] = D * Value[0, 2] - BC * Value[1, 2] + AC * Value[2, 2];
temp[11] = D * Value[0, 3] - BC * Value[1, 3] + AC * Value[2, 3];
Memory.Copy( Value, temp, sizeof( temp ) );
return this;
}
[CallingConvention( Value = DefaultCallingConvention )]
public static Matrix4x4 operator *( Matrix4x4& leftMatrix, Matrix4x4& rightMatrix ) asm {
X86_32 {
feature( SSE2 ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - leftMatrix
// edx - rightMatrix
mov eax, [esp + 4] // result
}
else {
mov ecx, [esp + 4] // leftMatrix
mov edx, [esp + 8] // rightMatrix
mov eax, [esp + 12] // result
}
prefetchnta [ecx]
movaps xmm7, [edx]
movaps xmm0, [ecx]
movaps xmm1, [ecx + 16]
movaps xmm2, [ecx + 32]
movaps xmm3, [ecx + 48]
pshufd xmm4, xmm7, 0b00000000
pshufd xmm5, xmm7, 0b01010101
pshufd xmm6, xmm7, 0b10101010
pshufd xmm7, xmm7, 0b11111111
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
mulps xmm7, xmm3
addps xmm4, xmm5
addps xmm6, xmm7
addps xmm4, xmm6
movaps xmm7, [edx + 16]
movaps [eax], xmm4
pshufd xmm4, xmm7, 0b00000000
pshufd xmm5, xmm7, 0b01010101
pshufd xmm6, xmm7, 0b10101010
pshufd xmm7, xmm7, 0b11111111
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
mulps xmm7, xmm3
addps xmm4, xmm5
addps xmm6, xmm7
addps xmm4, xmm6
movaps xmm7, [edx + 32]
movaps [eax + 16], xmm4
pshufd xmm4, xmm7, 0b00000000
pshufd xmm5, xmm7, 0b01010101
pshufd xmm6, xmm7, 0b10101010
pshufd xmm7, xmm7, 0b11111111
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
mulps xmm7, xmm3
addps xmm4, xmm5
addps xmm6, xmm7
addps xmm4, xmm6
movaps xmm7, [edx + 48]
movaps [eax + 32], xmm4
pshufd xmm4, xmm7, 0b00000000
pshufd xmm5, xmm7, 0b01010101
pshufd xmm6, xmm7, 0b10101010
pshufd xmm7, xmm7, 0b11111111
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
mulps xmm7, xmm3
addps xmm4, xmm5
addps xmm6, xmm7
addps xmm4, xmm6
movaps [eax + 48], xmm4
retn
}
feature( SSE1 ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - leftMatrix
// edx - rightMatrix
mov eax, [esp + 4] // result
}
else {
mov ecx, [esp + 4] // leftMatrix
mov edx, [esp + 8] // rightMatrix
mov eax, [esp + 12] // result
}
prefetchnta [ecx]
movaps xmm0, [ecx]
movaps xmm1, [ecx + 16]
movaps xmm2, [ecx + 32]
movaps xmm3, [ecx + 48]
prefetchnta [edx]
movaps xmm4, [edx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 0b00000000
shufps xmm5, xmm5, 0b01010101
shufps xmm6, xmm6, 0b10101010
shufps xmm7, xmm7, 0b11111111
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
mulps xmm7, xmm3
addps xmm4, xmm5
addps xmm6, xmm7
addps xmm4, xmm6
movaps xmm5, [edx + 16]
movaps [eax], xmm4
movaps xmm4, xmm5
movaps xmm6, xmm5
movaps xmm7, xmm5
shufps xmm4, xmm4, 0b00000000
shufps xmm5, xmm5, 0b01010101
shufps xmm6, xmm6, 0b10101010
shufps xmm7, xmm7, 0b11111111
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
mulps xmm7, xmm3
addps xmm4, xmm5
addps xmm6, xmm7
addps xmm4, xmm6
movaps xmm5, [edx + 32]
movaps [eax + 16], xmm4
movaps xmm4, xmm5
movaps xmm6, xmm5
movaps xmm7, xmm5
shufps xmm4, xmm4, 0b00000000
shufps xmm5, xmm5, 0b01010101
shufps xmm6, xmm6, 0b10101010
shufps xmm7, xmm7, 0b11111111
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
mulps xmm7, xmm3
addps xmm4, xmm5
addps xmm6, xmm7
addps xmm4, xmm6
movaps xmm5, [edx + 48]
movaps [eax + 32], xmm4
movaps xmm4, xmm5
movaps xmm6, xmm5
movaps xmm7, xmm5
shufps xmm4, xmm4, 0b00000000
shufps xmm5, xmm5, 0b01010101
shufps xmm6, xmm6, 0b10101010
shufps xmm7, xmm7, 0b11111111
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
mulps xmm7, xmm3
addps xmm4, xmm5
addps xmm6, xmm7
addps xmm4, xmm6
movaps [eax + 48], xmm4
retn
}
feature( AMD3DNow ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - leftMatrix
// edx - rightMatrix
mov eax, [esp + 4] // result
}
else {
mov ecx, [esp + 4] // leftMatrix
mov edx, [esp + 8] // rightMatrix
mov eax, [esp + 12] // result
}
cmp eax, edx
jz RightAndResultSame
push esp
RightAndResultNotSame:
movq mm0, [ecx]
movq mm1, [ecx + 16]
movq mm2, [ecx + 32]
movq mm3, [ecx + 48]
pshufw mm4, [edx], 0b01000100
pshufw mm5, [edx], 0b11101110
pshufw mm6, [edx + 8], 0b01000100
pshufw mm7, [edx + 8], 0b11101110
pfmul mm4, mm0
pfmul mm5, mm1
pfadd mm4, mm5
pfmul mm6, mm2
pfadd mm4, mm6
pfmul mm7, mm3
pfadd mm4, mm7
movq [eax], mm4
pshufw mm4, [edx + 16], 0b01000100
pshufw mm5, [edx + 16], 0b11101110
pshufw mm6, [edx + 24], 0b01000100
pshufw mm7, [edx + 24], 0b11101110
pfmul mm4, mm0
pfmul mm5, mm1
pfadd mm4, mm5
pfmul mm6, mm2
pfadd mm4, mm6
pfmul mm7, mm3
pfadd mm4, mm7
movq [eax + 16], mm4
pshufw mm4, [edx + 32], 0b01000100
pshufw mm5, [edx + 32], 0b11101110
pshufw mm6, [edx + 40], 0b01000100
pshufw mm7, [edx + 40], 0b11101110
pfmul mm4, mm0
pfmul mm5, mm1
pfadd mm4, mm5
pfmul mm6, mm2
pfadd mm4, mm6
pfmul mm7, mm3
pfadd mm4, mm7
movq [eax + 32], mm4
pshufw mm4, [edx + 48], 0b01000100
pshufw mm5, [edx + 48], 0b11101110
pshufw mm6, [edx + 56], 0b01000100
pshufw mm7, [edx + 56], 0b11101110
pfmul mm4, mm0
pfmul mm5, mm1
pfadd mm4, mm5
pfmul mm6, mm2
pfadd mm4, mm6
pfmul mm7, mm3
pfadd mm4, mm7
movq [eax + 48], mm4
movq mm0, [ecx + 8]
movq mm1, [ecx + 24]
movq mm2, [ecx + 40]
movq mm3, [ecx + 56]
pshufw mm4, [edx], 0b01000100
pshufw mm5, [edx], 0b11101110
pshufw mm6, [edx + 8], 0b01000100
pshufw mm7, [edx + 8], 0b11101110
pfmul mm4, mm0
pfmul mm5, mm1
pfadd mm4, mm5
pfmul mm6, mm2
pfadd mm4, mm6
pfmul mm7, mm3
pfadd mm4, mm7
movq [eax + 8], mm4
pshufw mm4, [edx + 16], 0b01000100
pshufw mm5, [edx + 16], 0b11101110
pshufw mm6, [edx + 24], 0b01000100
pshufw mm7, [edx + 24], 0b11101110
pfmul mm4, mm0
pfmul mm5, mm1
pfadd mm4, mm5
pfmul mm6, mm2
pfadd mm4, mm6
pfmul mm7, mm3
pfadd mm4, mm7
movq [eax + 24], mm4
pshufw mm4, [edx + 32], 0b01000100
pshufw mm5, [edx + 32], 0b11101110
pshufw mm6, [edx + 40], 0b01000100
pshufw mm7, [edx + 40], 0b11101110
pfmul mm4, mm0
pfmul mm5, mm1
pfadd mm4, mm5
pfmul mm6, mm2
pfadd mm4, mm6
pfmul mm7, mm3
pfadd mm4, mm7
movq [eax + 40], mm4
pshufw mm4, [edx + 48], 0b01000100
pshufw mm5, [edx + 48], 0b11101110
pshufw mm6, [edx + 56], 0b01000100
pshufw mm7, [edx + 56], 0b11101110
pfmul mm4, mm0
pfmul mm5, mm1
pfadd mm4, mm5
pfmul mm6, mm2
pfadd mm4, mm6
pfmul mm7, mm3
pfadd mm4, mm7
movq [eax + 56], mm4
femms
pop esp
retn
RightAndResultSame:
mov edx, esp
and esp, 0xFFFFFFF0
sub esp, 64
push edx
lea edx, [esp + 4]
movq mm0, [eax]
movq mm1, [eax + 8]
movq mm2, [eax + 16]
movq mm3, [eax + 24]
movq mm4, [eax + 32]
movq mm5, [eax + 40]
movq mm6, [eax + 48]
movq mm6, [eax + 56]
movq [edx], mm0
movq [edx + 8], mm1
movq [edx + 16], mm2
movq [edx + 24], mm3
movq [edx + 32], mm4
movq [edx + 40], mm5
movq [edx + 48], mm6
movq [edx + 56], mm7
jmp RightAndResultNotSame
}
}
default {
Matrix4x4 resultMatrix;
var& left = leftMatrix.Value;
var& right = rightMatrix.Value;
var& result = resultMatrix.Value;
result[0, 0] = left[0, 0] * right[0, 0] + left[1, 0] * right[0, 1] + left[2, 0] * right[0, 2] + left[3, 0] * right[0, 3];
result[0, 1] = left[0, 1] * right[0, 0] + left[1, 1] * right[0, 1] + left[2, 1] * right[0, 2] + left[3, 1] * right[0, 3];
result[0, 2] = left[0, 2] * right[0, 0] + left[1, 2] * right[0, 1] + left[2, 2] * right[0, 2] + left[3, 2] * right[0, 3];
result[0, 3] = left[0, 3] * right[0, 0] + left[1, 3] * right[0, 1] + left[2, 3] * right[0, 2] + left[3, 3] * right[0, 3];
result[1, 0] = left[0, 0] * right[1, 0] + left[1, 0] * right[1, 1] + left[2, 0] * right[1, 2] + left[3, 0] * right[1, 3];
result[1, 1] = left[0, 1] * right[1, 0] + left[1, 1] * right[1, 1] + left[2, 1] * right[1, 2] + left[3, 1] * right[1, 3];
result[1, 2] = left[0, 2] * right[1, 0] + left[1, 2] * right[1, 1] + left[2, 2] * right[1, 2] + left[3, 2] * right[1, 3];
result[1, 3] = left[0, 3] * right[1, 0] + left[1, 3] * right[1, 1] + left[2, 3] * right[1, 2] + left[3, 3] * right[1, 3];
result[2, 0] = left[0, 0] * right[2, 0] + left[1, 0] * right[2, 1] + left[2, 0] * right[2, 2] + left[3, 0] * right[2, 3];
result[2, 1] = left[0, 1] * right[2, 0] + left[1, 1] * right[2, 1] + left[2, 1] * right[2, 2] + left[3, 1] * right[2, 3];
result[2, 2] = left[0, 2] * right[2, 0] + left[1, 2] * right[2, 1] + left[2, 2] * right[2, 2] + left[3, 2] * right[2, 3];
result[2, 3] = left[0, 3] * right[2, 0] + left[1, 3] * right[2, 1] + left[2, 3] * right[2, 2] + left[3, 3] * right[2, 3];
result[3, 0] = left[0, 0] * right[3, 0] + left[1, 0] * right[3, 1] + left[2, 0] * right[3, 2] + left[3, 0] * right[3, 3];
result[3, 1] = left[0, 1] * right[3, 0] + left[1, 1] * right[3, 1] + left[2, 1] * right[3, 2] + left[3, 1] * right[3, 3];
result[3, 2] = left[0, 2] * right[3, 0] + left[1, 2] * right[3, 1] + left[2, 2] * right[3, 2] + left[3, 2] * right[3, 3];
result[3, 3] = left[0, 3] * right[3, 0] + left[1, 3] * right[3, 1] + left[2, 3] * right[3, 2] + left[3, 3] * right[3, 3];
return resultMatrix;
}
}
[CallingConvention( Value = DefaultCallingConvention )]
public static Matrix4x4 operator -( Matrix4x4& leftMatrix, Matrix4x4& rightMatrix ) asm {
X86_32 {
feature( SSE1 ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - leftMatrix
// edx - rightMatrix
mov eax, [esp + 4] // result
}
else {
mov ecx, [esp + 4] // leftMatrix
mov edx, [esp + 8] // rightMatrix
mov eax, [esp + 12] // result
}
movaps xmm0, [ecx]
movaps xmm1, [ecx + 16]
movaps xmm2, [ecx + 32]
movaps xmm3, [ecx + 48]
subps xmm0, [edx]
subps xmm1, [edx + 16]
subps xmm2, [edx + 32]
subps xmm3, [edx + 48]
movaps [eax], xmm0
movaps [eax + 16], xmm1
movaps [eax + 32], xmm2
movaps [eax + 48], xmm3
retn
}
feature( AMD3DNow ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - leftMatrix
// edx - rightMatrix
mov eax, [esp + 4] // result
}
else {
mov ecx, [esp + 4] // leftMatrix
mov edx, [esp + 8] // rightMatrix
mov eax, [esp + 12] // result
}
movq mm0, [ecx]
movq mm1, [ecx + 8]
movq mm2, [ecx + 16]
movq mm3, [ecx + 24]
movq mm4, [ecx + 32]
movq mm5, [ecx + 40]
movq mm6, [ecx + 48]
movq mm7, [ecx + 56]
pfsub mm0, [edx]
pfsub mm1, [edx + 8]
pfsub mm2, [edx + 16]
pfsub mm3, [edx + 24]
pfsub mm4, [edx + 32]
pfsub mm5, [edx + 40]
pfsub mm6, [edx + 48]
pfsub mm7, [edx + 56]
movq [eax], mm0
movq [eax + 8], mm1
movq [eax + 16], mm2
movq [eax + 24], mm3
movq [eax + 32], mm4
movq [eax + 40], mm5
movq [eax + 48], mm6
movq [eax + 56], mm7
femms
retn
}
default {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - leftMatrix
// edx - rightMatrix
mov eax, [esp + 4] // result
}
else {
mov ecx, [esp + 4] // leftMatrix
mov edx, [esp + 8] // rightMatrix
mov eax, [esp + 12] // result
}
fld dword ptr [ecx]
fsub dword ptr [edx]
fld dword ptr [ecx + 4]
fsub dword ptr [edx + 4]
fld dword ptr [ecx + 8]
fsub dword ptr [edx + 8]
fld dword ptr [ecx + 12]
fsub dword ptr [edx + 12]
fld dword ptr [ecx + 16]
fsub dword ptr [edx + 16]
fld dword ptr [ecx + 20]
fsub dword ptr [edx + 20]
fld dword ptr [ecx + 24]
fsub dword ptr [edx + 24]
fld dword ptr [ecx + 28]
fsub dword ptr [edx + 28]
fstp dword ptr [eax + 28]
fstp dword ptr [eax + 24]
fstp dword ptr [eax + 20]
fstp dword ptr [eax + 16]
fstp dword ptr [eax + 12]
fstp dword ptr [eax + 8]
fstp dword ptr [eax + 4]
fstp dword ptr [eax]
fld dword ptr [ecx + 32]
fsub dword ptr [edx + 32]
fld dword ptr [ecx + 36]
fsub dword ptr [edx + 36]
fld dword ptr [ecx + 40]
fsub dword ptr [edx + 40]
fld dword ptr [ecx + 44]
fsub dword ptr [edx + 44]
fld dword ptr [ecx + 48]
fsub dword ptr [edx + 48]
fld dword ptr [ecx + 52]
fsub dword ptr [edx + 52]
fld dword ptr [ecx + 56]
fsub dword ptr [edx + 56]
fld dword ptr [ecx + 60]
fsub dword ptr [edx + 60]
fstp dword ptr [eax + 60]
fstp dword ptr [eax + 56]
fstp dword ptr [eax + 52]
fstp dword ptr [eax + 48]
fstp dword ptr [eax + 44]
fstp dword ptr [eax + 40]
fstp dword ptr [eax + 36]
fstp dword ptr [eax + 32]
retn
}
}
default {
Matrix4x4 resultMatrix;
var& left = leftMatrix.Value;
var& right = rightMatrix.Value;
var& result = resultMatrix.Value;
for( int i = 0; i < 4; ++i )
for( int j = 0; j < 4; ++j )
result[i, j] = left[i, j] - right[i, j];
return resultMatrix;
}
}
[CallingConvention( Value = DefaultCallingConvention )]
public static Matrix4x4 operator +( Matrix4x4& leftMatrix, Matrix4x4& rightMatrix ) asm {
X86_32 {
feature( SSE1 ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - leftMatrix
// edx - rightMatrix
mov eax, [esp + 4] // result
}
else {
mov ecx, [esp + 4] // leftMatrix
mov edx, [esp + 8] // rightMatrix
mov eax, [esp + 12] // result
}
movaps xmm0, [ecx]
movaps xmm1, [ecx + 16]
movaps xmm2, [ecx + 32]
movaps xmm3, [ecx + 48]
addps xmm0, [edx]
addps xmm1, [edx + 16]
addps xmm2, [edx + 32]
addps xmm3, [edx + 48]
movaps [eax], xmm0
movaps [eax + 16], xmm1
movaps [eax + 32], xmm2
movaps [eax + 48], xmm3
retn
}
feature( AMD3DNow ) {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - leftMatrix
// edx - rightMatrix
mov eax, [esp + 4] // result
}
else {
mov ecx, [esp + 4] // leftMatrix
mov edx, [esp + 8] // rightMatrix
mov eax, [esp + 12] // result
}
movq mm0, [ecx]
movq mm1, [ecx + 8]
movq mm2, [ecx + 16]
movq mm3, [ecx + 24]
movq mm4, [ecx + 32]
movq mm5, [ecx + 40]
movq mm6, [ecx + 48]
movq mm7, [ecx + 56]
pfadd mm0, [edx]
pfadd mm1, [edx + 8]
pfadd mm2, [edx + 16]
pfadd mm3, [edx + 24]
pfadd mm4, [edx + 32]
pfadd mm5, [edx + 40]
pfadd mm6, [edx + 48]
pfadd mm7, [edx + 56]
movq [eax], mm0
movq [eax + 8], mm1
movq [eax + 16], mm2
movq [eax + 24], mm3
movq [eax + 32], mm4
movq [eax + 40], mm5
movq [eax + 48], mm6
movq [eax + 56], mm7
femms
retn
}
default {
if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
// ecx - leftMatrix
// edx - rightMatrix
mov eax, [esp + 4] // result
}
else {
mov ecx, [esp + 4] // leftMatrix
mov edx, [esp + 8] // rightMatrix
mov eax, [esp + 12] // result
}
fld dword ptr [ecx]
fadd dword ptr [edx]
fld dword ptr [ecx + 4]
fadd dword ptr [edx + 4]
fld dword ptr [ecx + 8]
fadd dword ptr [edx + 8]
fld dword ptr [ecx + 12]
fadd dword ptr [edx + 12]
fld dword ptr [ecx + 16]
fadd dword ptr [edx + 16]
fld dword ptr [ecx + 20]
fadd dword ptr [edx + 20]
fld dword ptr [ecx + 24]
fadd dword ptr [edx + 24]
fld dword ptr [ecx + 28]
fadd dword ptr [edx + 28]
fstp dword ptr [eax + 28]
fstp dword ptr [eax + 24]
fstp dword ptr [eax + 20]
fstp dword ptr [eax + 16]
fstp dword ptr [eax + 12]
fstp dword ptr [eax + 8]
fstp dword ptr [eax + 4]
fstp dword ptr [eax]
fld dword ptr [ecx + 32]
fadd dword ptr [edx + 32]
fld dword ptr [ecx + 36]
fadd dword ptr [edx + 36]
fld dword ptr [ecx + 40]
fadd dword ptr [edx + 40]
fld dword ptr [ecx + 44]
fadd dword ptr [edx + 44]
fld dword ptr [ecx + 48]
fadd dword ptr [edx + 48]
fld dword ptr [ecx + 52]
fadd dword ptr [edx + 52]
fld dword ptr [ecx + 56]
fadd dword ptr [edx + 56]
fld dword ptr [ecx + 60]
fadd dword ptr [edx + 60]
fstp dword ptr [eax + 60]
fstp dword ptr [eax + 56]
fstp dword ptr [eax + 52]
fstp dword ptr [eax + 48]
fstp dword ptr [eax + 44]
fstp dword ptr [eax + 40]
fstp dword ptr [eax + 36]
fstp dword ptr [eax + 32]
retn
}
}
default {
Matrix4x4 resultMatrix;
var& left = leftMatrix.Value;
var& right = rightMatrix.Value;
var& result = resultMatrix.Value;
for( int i = 0; i < 4; ++i )
for( int j = 0; j < 4; ++j )
result[i, j] = left[i, j] + right[i, j];
return resultMatrix;
}
}
[UnitTest]
private static void UnitTest() {
var mat1 = new Matrix4x4( new float[16] {
1f, 2, 3, 0,
4f, 5, 6, 0,
0f, 8, 7, 0,
2f, 3, 0, 1
} );
var equal1 = new Matrix4x4( new float[16] {
bitcast<float>( bitcast<uint>( 1f ) + 1 ), 2, 3, 0,
bitcast<float>( bitcast<uint>( 4f ) + 1 ), 5, 6, 0,
bitcast<float>( bitcast<uint>( 0f ) + 1 ), 8, 7, 0,
bitcast<float>( bitcast<uint>( 2f ) + 1 ), 3, 0, 1
} );
var mat2 = new Matrix4x4( new float[16] {
-0.48148149f, 0.37037039f, -0.11111111f, 0f,
-1.037037f, 0.25925925f, 0.22222222f, 0f,
1.1851852f, -0.2962963f, -0.11111111f, 0f,
4.0740743f, -1.5185186f, -0.44444445f, 1f
} );
var mat3 = new Matrix4x4( new float[16] {
1, 4, 0, 2,
2, 5, 8, 3,
3, 6, 7, 0,
0, 0, 0, 1
} );
Assert.IsTrue( mat1.Equals( equal1, 2 ) );
Assert.IsTrue( Identity.Determinant == 1f );
Assert.IsTrue( mat1.Determinant == 27f );
Assert.IsTrue( mat1.Inversed == mat2 );
Assert.IsTrue( mat1.Transposed == mat3 );
var ozdrmat = new Matrix4x4( new float[16] {
-1, 0, 0, 0,
2, -1, 0, 0,
3, 2, 1, 0,
4, 3, 2, 1
} );
var ozdrVector = new Vector4( 2, 0, 1, 0 );
Assert.IsTrue( new Vector4( 5, 2, 1, 0 ) == ozdrmat.Inversed * ozdrVector );
}
}
}