//
// Created by Eugeny Grishul
//
// See license at http://bamelg.com/license.txt
//

using System.Runtime.InteropServices;

namespace System.Numerics {
	[Alignment( Boundary = 16 )]
	public struct Quaternion : IFormattable {
		public float X, Y, Z, W;

		public Quaternion() { }

		public Quaternion( float x, float y, float z, float w ) {
			X = x;
			Y = y;
			Z = z;
			W = w;
		}

		public static Quaternion operator +( Quaternion& left, Quaternion& right ) {
			return new Quaternion( left.X + right.X, left.Y + right.Y, left.Z + right.Z, left.W + right.W );
		}

		public static Quaternion operator -( Quaternion& left, Quaternion& right ) {
			return new Quaternion( left.X - right.X, left.Y - right.Y, left.Z - right.Z, left.W - right.W );
		}

		public static bool operator ==( Quaternion& left, Quaternion& right ) {
			return left.X == right.X & left.Y == right.Y & left.Z == right.Z & left.W == right.W;
		}

		public static bool operator !=( Quaternion& left, Quaternion& right ) {
			return left.X != right.X | left.Y != right.Y | left.Z != right.Z | left.W != right.W;
		}

		private static uint _mnull = 0x80000001;
		private static int _minus1 = -1;

		public bool Equals( Quaternion& other, int tolerance ) {
			return ( ( Vector4* ) &this )->Equals( *( Vector4* ) &other, tolerance );
		}

		[Alignment( Boundary = 16 )]
		private static uint[4] _changeWSignMask = new uint[4] { 0, 0, 0, 0x80000000 };

		[Alignment( Boundary = 8 )]
		private static uint[2] _changeSign = new uint[2] { 0x80000000, 0x00000000 };

		public static Quaternion operator *( Quaternion& left, Quaternion& right ) asm {
			X86_32 {
				feature( SSE1 ) {
					if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
						// ecx - left
						// edx - right
						mov eax, [esp + 4] // result
					}
					else {
						mov ecx, [esp + 4]  // left
						mov edx, [esp + 8]  // right
						mov eax, [esp + 12] // result
					}

					movaps xmm0, [ecx]
					movaps xmm1, [edx]
					movaps xmm2, xmm0
					movaps xmm3, xmm1
					movaps xmm4, xmm1
					shufps xmm0, xmm0, 0b11111111
					shufps xmm1, xmm1, 0b00111111
					shufps xmm2, xmm2, 0b00100100
					mulps xmm0, xmm3
					mulps xmm1, xmm2
					shufps xmm3, xmm3, 0b01010010
					shufps xmm2, xmm2, 0b01001001
					shufps xmm4, xmm4, 0b10001001
					mulps xmm3, xmm2
					shufps xmm2, xmm2, 0b01001001
					addps xmm1, xmm3
					mulps xmm2, xmm4
					xorps xmm1, [[_changeWSignMask]]
					subps xmm0, xmm2
					addps xmm0, xmm1

					movaps [eax], xmm0

					retn
				}
				feature( AMD3DNow ) {
					if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
						// ecx - left
						// edx - right
						mov eax, [esp + 4] // result
					}
					else {
						mov ecx, [esp + 4]  // left
						mov edx, [esp + 8]  // right
						mov eax, [esp + 12] // result
					}

					movq mm2, [ecx]
					movq mm3, [ecx + 8]
					movq mm4, mm2
					movq mm5, mm3
					movq mm0, [edx]
					punpckhdq mm4, mm2
					punpckhdq mm5, mm3
					movq mm1, [edx + 8]
					punpckldq mm4, mm2
					punpckldq mm5, mm3
					mov eax, [esp + 4]
					movq mm6, mm4
					movq mm7, mm5
					pfmul mm2, mm1
					pxor mm5, [[_changeSign]]
					pfmul mm3, mm0
					pxor mm2, [[_changeSign]]
					pfmul mm5, mm0
					pfmul mm4, mm1
					pfadd mm2, mm3
					movq mm3, [ecx]
					pfsub mm4, mm5
					movq mm5, [ecx + 8]
					pfmul mm7, mm1
					pxor mm6, [[_changeSign]]
					pfacc mm4, mm2
					pfmul mm6, mm0
					pxor mm5, [[_changeSign]]
					pfmul mm3, mm0
					movq [eax], mm4
					pfmul mm5, mm1
					pfadd mm6, mm7
					pfsub mm5, mm3
					pfacc mm6, mm5
					movq [eax + 8], mm6

					femms
					retn
				}
				default {
					if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
						// ecx - left
						// edx - right
						mov eax, [esp + 4] // result
					}
					else {
						mov ecx, [esp + 4]  // left
						mov edx, [esp + 8]  // right
						mov eax, [esp + 12] // result
					}

					fld dword ptr [edx]
					fld dword ptr [edx + 4]
					fld dword ptr [edx + 8]
					fld dword ptr [edx + 12]
					fld st3
					fld st3
					fld st3
					fld dword ptr [ecx + 12]
					fmul st7, st0
					fmul st6, st0
					fmul st5, st0
					fmulp st4, st0
					fld dword ptr [ecx]
					fmul st3, st0
					fmul st2, st0
					fmul st1, st0
					fmul dword ptr [edx + 12]

					faddp st7, st0
					fsubp st5, st0
					faddp st3, st0
					fsubp st1, st0

					fld dword ptr [edx + 8]
					fld dword ptr [edx + 12]
					fld dword ptr [edx]

					fld dword ptr [ecx + 4]

					fmul st3, st0
					fmul st2, st0
					fmul st1, st0
					fmul dword ptr [edx + 4]

					fsubp st4, st0
					fsubp st4, st0
					faddp st4, st0
					faddp st4, st0

					fld dword ptr [edx + 4]
					fld dword ptr [edx]
					fld dword ptr [edx + 12]

					fld dword ptr [ecx + 8]
					fmul st3, st0
					fmul st2, st0
					fmul st1, st0
					fmul dword ptr [edx + 8]

					fsubp st4, st0
					faddp st4, st0
					faddp st4, st0
					fsubp st4, st0

					fstp dword ptr [eax + 12]
					fstp dword ptr [eax + 8]
					fstp dword ptr [eax + 4]
					fstp dword ptr [eax]

					retn
				}
			}
			default {
				return new Quaternion( left.W * right.X + left.X * right.W + left.Y * right.Z - left.Z * right.Y, left.W * right.Y - left.X * right.Z + left.Y * right.W + left.Z * right.X, left.W * right.Z + left.X * right.Y - left.Y * right.X + left.Z * right.W, left.W * right.W - left.X * right.X - left.Y * right.Y - left.Z * right.Z );
			}
		}

		private static float _three = 3f;
		private static float _oneHalf = 0.5f;

		public Quaternion Normalized {
			get asm {
				X86_32 {
					feature( SSE1 ) {
						if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
							// ecx - this
							// edx - result
						}
						else {
							mov ecx, [esp + 4] // this
							mov edx, [esp + 8] // result
						}

						movaps xmm0, [ecx]
						mulps xmm0, xmm0
						xorps xmm2, xmm2
						movhlps xmm2, xmm0
						addps xmm2, xmm0
						movaps xmm3, xmm2
						shufps xmm3, xmm3, 1
						addss xmm2, xmm3

						rsqrtss xmm1, xmm2
						movss xmm4, [[_three]]
						movss xmm3, xmm1
						mulss xmm1, xmm2
						mulss xmm1, xmm3
						mulss xmm3, [[_oneHalf]]
						subss xmm4, xmm1
						mulss xmm4, xmm3

						shufps xmm4, xmm4, 0
						mulps xmm4, [ecx]
						movaps [edx], xmm4

						retn
					}
					feature( AMD3DNow ) {
						if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
							// ecx - this
							// edx - result
						}
						else {
							mov ecx, [esp + 4] // this
							mov edx, [esp + 8] // result
						}

						movq mm0, [ecx]
						movq mm1, [ecx + 8]

						movq mm6, mm0
						movq mm7, mm1

						pfmul mm0, mm0
						pfmul mm1, mm1

						pfadd mm0, mm1
						pfacc mm0, mm0

						pfrsqrt mm1, mm0
						movq mm2, mm1
						pfmul mm1, mm1
						pfrsqit1 mm1, mm0
						pfrcpit2 mm1, mm2

						pfmul mm6, mm1
						pfmul mm7, mm1

						movq [edx], mm6
						movq [edx + 8], mm7

						femms
						retn
					}
				}
				default {
					return this;
				}
			}
		}

		public Quaternion& Normalize() asm {
			X86_32 {
				feature( SSE1 ) {
					if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
						// ecx - this
					}
					else {
						mov ecx, [esp + 4] // this
					}

					movaps xmm0, [ecx]
					mulps xmm0, xmm0
					xorps xmm2, xmm2
					movhlps xmm2, xmm0
					addps xmm2, xmm0
					movaps xmm3, xmm2
					shufps xmm3, xmm3, 1
					addss xmm2, xmm3

					rsqrtss xmm1, xmm2
					movss xmm4, [[_three]]
					movss xmm3, xmm1
					mulss xmm1, xmm2
					mulss xmm1, xmm3
					mulss xmm3, [[_oneHalf]]
					subss xmm4, xmm1
					mulss xmm4, xmm3

					shufps xmm4, xmm4, 0
					mulps xmm4, [ecx]
					movaps [ecx], xmm4

					retn
				}
				feature( AMD3DNow ) {
					if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
						// ecx - this
					}
					else {
						mov ecx, [esp + 4] // this
					}

					movq mm0, [ecx]
					movq mm1, [ecx + 8]

					movq mm6, mm0
					movq mm7, mm1

					pfmul mm0, mm0
					pfmul mm1, mm1

					pfadd mm0, mm1
					pfacc mm0, mm0

					pfrsqrt mm1, mm0
					movq mm2, mm1
					pfmul mm1, mm1
					pfrsqit1 mm1, mm0
					pfrcpit2 mm1, mm2

					pfmul mm6, mm1
					pfmul mm7, mm1

					movq [ecx], mm6
					movq [ecx + 8], mm7

					femms
					retn
				}
			}
			default {
				return this;
			}
		}

		public static float operator |( Quaternion& left, Quaternion& right ) asm {
			X86_32 {
				feature( SSE1 ) {
					if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
						// ecx - left
						// edx - right
					}
					else {
						mov ecx, [esp + 4] // left
						mov edx, [esp + 8] // right
					}

					movaps xmm0, [ecx]
					mulps xmm0, [edx]

					xorps xmm1, xmm1
					movhlps xmm1, xmm0
					addps xmm0, xmm1

					movaps xmm2, xmm0
					shufps xmm2, xmm2, 0b0000001
					addss xmm0, xmm2

					mov eax, [esp]
					movss [esp], xmm0
					fld dword ptr [esp]
					pop ecx

					jmp eax
				}
				feature( AMD3DNow ) {
					if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
						// ecx - left
						// edx - right
					}
					else {
						mov ecx, [esp + 4] // left
						mov edx, [esp + 8] // right
					}

					movq mm0, [ecx]
					movq mm1, [ecx + 8]
					pfmul mm0, [edx]
					pfmul mm1, [edx + 8]
					pfadd mm0, mm1
					mov eax, [esp]
					pfacc mm0, mm0
					movd [esp], mm0
					femms

					fld dword ptr [esp]
					pop ecx

					jmp eax
				}
				default {
					if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
						// ecx - left
						// edx - right
					}
					else {
						mov ecx, [esp + 4] // left
						mov edx, [esp + 8] // right
					}

					fld dword ptr [ecx]
					fmul dword ptr [edx]
					fld dword ptr [ecx + 4]
					fmul dword ptr [edx + 4]
					fld dword ptr [ecx + 8]
					fmul dword ptr [edx + 8]
					fld dword ptr [ecx + 12]
					fmul dword ptr [edx + 12]

					faddp st2, st0
					faddp st2, st0
					faddp st1, st0

					retn
				}
			}
			default {
				return left.X * right.X + left.Y * right.Y + left.Z * right.Z + left.W * right.W;
			}
		}

		[Alignment( Boundary = 16 )]
		static uint[4] _changeXYZSign = new uint[4] { 0x80000000, 0x80000000, 0x80000000, 0 };

		public Quaternion& Invert() asm {
			X86_32 {
				feature( SSE1 ) {
					if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
						// ecx - left
					}
					else {
						mov ecx, [esp + 4] // this
					}

					movaps xmm0, [ecx]
					movaps xmm1, [ecx]
					mulps xmm0, xmm0
					xorps xmm1, [[_changeXYZSign]]
					xorps xmm2, xmm2
					movhlps xmm2, xmm0
					addps xmm0, xmm2
					movaps xmm2, xmm0
					shufps xmm2, xmm2, 1
					addss xmm0, xmm2

					rcpss xmm2, xmm0
					mulss xmm0, xmm2
					mulss xmm0, xmm2
					addss xmm2, xmm2
					subss xmm2, xmm0

					shufps xmm2, xmm2, 0
					mulps xmm1, xmm2

					movaps [ecx], xmm1
					retn
				}
				feature( AMD3DNow ) {
					if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
						// ecx - left
					}
					else {
						mov ecx, [esp + 4] // this
					}

					movq mm0, [ecx]
					movq mm1, [ecx + 8]

					movq mm2, mm0
					movq mm3, mm1

					pfmul mm0, mm0
					pfmul mm1, mm1

					pxor mm2, [[_changeXYZSign]]
					pxor mm3, [[_changeSign]]

					pfadd mm0, mm1
					pfacc mm0, mm0

					pfrcp mm7, mm0
					pfrcpit1 mm0, mm7
					pfrcpit2 mm0, mm7

					pfmul mm2, mm0
					pfmul mm3, mm0

					movq [ecx], mm2
					movq [ecx + 8], mm3

					femms
					retn
				}
			}
			default {
				var length = -1f / LengthSquared;

				X *= length;
				Y *= length;
				Z *= length;
				W *= -length;

				return this;
			}
		}

		public Quaternion Inverted {
			get asm {
				X86_32 {
					feature( SSE1 ) {
						if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
							// ecx - left
							// edx - right
						}
						else {
							mov ecx, [esp + 4] // this
							mov edx, [esp + 8] // result
						}

						movaps xmm0, [ecx]
						movaps xmm1, [ecx]
						mulps xmm0, xmm0
						xorps xmm1, [[_changeXYZSign]]
						xorps xmm2, xmm2
						movhlps xmm2, xmm0
						addps xmm0, xmm2
						movaps xmm2, xmm0
						shufps xmm2, xmm2, 1
						addss xmm0, xmm2

						rcpss xmm2, xmm0
						mulss xmm0, xmm2
						mulss xmm0, xmm2
						addss xmm2, xmm2
						subss xmm2, xmm0

						shufps xmm2, xmm2, 0
						mulps xmm1, xmm2

						movaps [edx], xmm1
						retn
					}
					feature( AMD3DNow ) {
						if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
							// ecx - left
							// edx - right
						}
						else {
							mov ecx, [esp + 4] // this
							mov edx, [esp + 8] // result
						}

						movq mm0, [ecx]
						movq mm1, [ecx + 8]

						movq mm2, mm0
						movq mm3, mm1

						pfmul mm0, mm0
						pfmul mm1, mm1

						pxor mm2, [[_changeXYZSign]]
						pxor mm3, [[_changeSign]]

						pfadd mm0, mm1
						pfacc mm0, mm0

						pfrcp mm7, mm0
						pfrcpit1 mm0, mm7
						pfrcpit2 mm0, mm7

						pfmul mm2, mm0
						pfmul mm3, mm0

						movq [edx], mm2
						movq [edx + 8], mm3

						femms
						retn
					}
				}
				default {
					var length = 1f / LengthSquared;
					return Conjugated * length;
				}
			}
		}

		public Quaternion& Conjugate() asm {
			X86_32 {
				feature( SSE1 ) {
					if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
						// ecx - this
					}
					else {
						mov ecx, [esp + 4] // this
					}

					movaps xmm0, [ecx]
					xorps xmm0, [[_changeXYZSign]]
					movaps [ecx], xmm0
					retn
				}
				feature( MMX ) {
					if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
						// ecx - this
					}
					else {
						mov ecx, [esp + 4] // this
					}

					movq mm0, [ecx]
					movq mm1, [ecx + 8]
					pxor mm0, [[_changeXYZSign]]
					pxor mm1, [[_changeSign]]
					movq [ecx], mm0
					movq [ecx + 8], mm1

					emms
					retn
				}
				default {
					if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
						// ecx - this
					}
					else {
						mov ecx, [esp + 4] // this
					}

					mov edx, 0x80000000

					xor [ecx], edx
					xor [ecx + 4], edx
					xor [ecx + 8], edx

					retn
				}
			}
			default {
				X = -X;
				Y = -Y;
				Z = -Z;

				return this;
			}
		}

		public Quaternion Conjugated {
			get asm {
				X86_32 {
					feature( SSE1 ) {
						if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
							// ecx - this
							// edx - result
						}
						else {
							mov ecx, [esp + 4] // this
							mov edx, [esp + 8] // result
						}

						movaps xmm0, [ecx]
						xorps xmm0, [[_changeXYZSign]]
						movaps [edx], xmm0
						retn
					}
					feature( MMX ) {
						if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
							// ecx - this
							// edx - result
						}
						else {
							mov ecx, [esp + 4] // this
							mov edx, [esp + 8] // result
						}

						movq mm0, [ecx]
						movq mm1, [ecx + 8]
						pxor mm0, [[_changeXYZSign]]
						pxor mm1, [[_changeSign]]
						movq [edx], mm0
						movq [edx + 8], mm1

						emms
						retn
					}
					default {
						if( thismethod.CallingConvention == CallingConvention.FastCallX86 ) {
							// ecx - this
							// edx - result
						}
						else {
							mov ecx, [esp + 4] // this
							mov edx, [esp + 8] // result
						}

						fld dword ptr [ecx]
						fchs
						fld dword ptr [ecx + 4]
						fchs
						fld dword ptr [ecx + 8]
						fchs

						fld dword ptr [ecx + 12]
						fstp dword ptr [edx + 12]

						fstp dword ptr [edx + 8]
						fstp dword ptr [edx + 4]
						fstp dword ptr [edx]

						retn
					}
				}
				default {
					return new Quaternion( -X, -Y, -Z, W );
				}
			}
		}

		public Vector3 XAxis {
			get {
				return new Vector3( 1f - ( Y * ( Y + Y ) + Z * ( Z + Z ) ), X * ( Y + Y ) + W * ( Z + Z ), X * ( Z + Z ) - W * ( Y + Y ) );
			}
		}

		public Vector3 YAxis {
			get {
				return new Vector3( X * ( Y + Y ) - W * ( Z + Z ), 1f - ( X * ( X + X ) + Z * ( Z + Z ) ), Y * ( Z + Z ) + W * ( X + X ) );
			}
		}

		public Vector3 ZAxis {
			get {
				return new Vector3( X * ( Z + Z ) + W * ( Y + Y ), Y * ( Z + Z ) - W * ( X + X ), 1f - ( X * ( X + X ) + Y * ( Y + Y ) ) );
			}
		}

		public Vector3 Transform( Vector3& vector ) {
			Vector3 result;

			result.X = vector.X * ( 1f - ( Y * ( Y + Y ) + Z * ( Z + Z ) ) ) + vector.Y * ( X * ( Y + Y ) - W * ( Z + Z ) ) + vector.Z * ( X * ( Z + Z ) + W * ( Y + Y ) );

			result.Y = vector.X * ( X * ( Y + Y ) + W * ( Z + Z ) ) + vector.Y * ( 1f - ( X * ( X + X ) + Z * ( Z + Z ) ) ) + vector.Z * ( Y * ( Z + Z ) - W * ( X + X ) );

			result.Z = vector.X * ( X * ( Z + Z ) - W * ( Y + Y ) ) + vector.Y * ( Y * ( Z + Z ) + W * ( X + X ) ) + vector.Z * ( 1f - ( X * ( X + X ) + Y * ( Y + Y ) ) );

			return result;
		}

		public float Length {
			get {
				return Math.Sqrt( this | this );
			}
		}

		public float LengthSquared {
			get {
				return this | this;
			}
		}

		public static Quaternion operator *( Quaternion& left, float right ) {
			return new Quaternion( left.X * right, left.Y * right, left.Z * right, left.W * right );
		}

		void IFormattable.ToString( StringBuilder builder, string format ) { builder.AppendFormat( "({0};{1};{2};{3})", X, Y, Z, W ); }

		[UnitTest]
		public static void UnitTest() {
			var q1 = new Quaternion( 1, 2, 3, 4 );
			var q2 = new Quaternion( 3, 1, 2, 2 );

			var q3 = q1 * q2;
		}
	}
}