From b2a33cc72abf47105be554355b245925693a4c05 Mon Sep 17 00:00:00 2001 From: TomAwezome Date: Tue, 18 Jan 2022 00:52:46 -0500 Subject: [PATCH] Integrate HolyMath (from https://github.com/TempleProgramming/HolyMath). Create new folder System/Math/, move System/Math.ZC and System/MathODE.ZC to this new folder. Add HolyMath files to System/Math/. Change System/MakeSystem.ZC to #include System/Math/MakeMath.ZC. --- src/Doc/ChangeLog.DD | 7 + src/Kernel/KGlobals.ZC | 2 +- src/System/MakeSystem.ZC | 3 +- src/System/Math/Conversion.ZC | 89 ++++++ src/System/Math/F32.ZC | 136 +++++++++ src/System/Math/MakeMath.ZC | 20 ++ src/System/Math/Mat4.ZC | 350 +++++++++++++++++++++++ src/System/{ => Math}/Math.ZC | 0 src/System/{ => Math}/MathODE.ZC | 0 src/System/Math/NDArray.ZC | 86 ++++++ src/System/Math/Tests/TestF32.ZC | 54 ++++ src/System/Math/Tests/TestMat4.ZC | 103 +++++++ src/System/Math/Tests/TestVec.ZC | 206 ++++++++++++++ src/System/Math/Types.ZC | 127 +++++++++ src/System/Math/Vec3.ZC | 328 +++++++++++++++++++++ src/System/Math/Vec4.ZC | 458 ++++++++++++++++++++++++++++++ 16 files changed, 1966 insertions(+), 3 deletions(-) create mode 100755 src/System/Math/Conversion.ZC create mode 100755 src/System/Math/F32.ZC create mode 100755 src/System/Math/MakeMath.ZC create mode 100755 src/System/Math/Mat4.ZC rename src/System/{ => Math}/Math.ZC (100%) rename src/System/{ => Math}/MathODE.ZC (100%) create mode 100755 src/System/Math/NDArray.ZC create mode 100755 src/System/Math/Tests/TestF32.ZC create mode 100755 src/System/Math/Tests/TestMat4.ZC create mode 100755 src/System/Math/Tests/TestVec.ZC create mode 100755 src/System/Math/Types.ZC create mode 100755 src/System/Math/Vec3.ZC create mode 100755 src/System/Math/Vec4.ZC diff --git a/src/Doc/ChangeLog.DD b/src/Doc/ChangeLog.DD index 1c36f562..716a80b5 100755 --- a/src/Doc/ChangeLog.DD +++ b/src/Doc/ChangeLog.DD @@ -1,4 +1,11 @@ $WW,1$$FG,5$$TX+CX,"ChangeLog"$$FG$ +$IV,1$----01/18/22 00:35:45----$IV,0$ +* Raised version number to 1.09. +* Integrated HolyMath into OS (from https://github.com/TempleProgramming/HolyMath). + - Created new folder System/Math/, moved $LK+PU,"Math",A="FI:::/System/Math/Math.ZC"$ and $LK+PU,"MathODE",A="FI:::/System/Math/MathODE.ZC"$ there. + - Added HolyMath files to System/Math/. + - Changed $LK+PU,"MakeSystem",A="FF:::/System/MakeSystem.ZC,Math/MakeMath"$ to #include $LK+PU,"Math/MakeMath",A="FI:::/System/Math/MakeMath.ZC"$. + $IV,1$----12/30/21 22:08:17----$IV,0$ * Raised version number to 1.08. * Implemented $LK+PU,"WinTileGrid",A="MN:WinTileGrid"$. diff --git a/src/Kernel/KGlobals.ZC b/src/Kernel/KGlobals.ZC index 46278cec..71ba2678 100755 --- a/src/Kernel/KGlobals.ZC +++ b/src/Kernel/KGlobals.ZC @@ -13,7 +13,7 @@ CTask *sys_winmgr_task, U8 *rev_bits_table; //Table with U8 bits reversed CDate local_time_offset; F64 *pow10_I64, - sys_os_version = 1.08; + sys_os_version = 1.09; CAutoCompleteDictGlobals acd; CAutoCompleteGlobals ac; diff --git a/src/System/MakeSystem.ZC b/src/System/MakeSystem.ZC index adeff42e..343a072a 100755 --- a/src/System/MakeSystem.ZC +++ b/src/System/MakeSystem.ZC @@ -1,9 +1,8 @@ Cd(__DIR__);; #include "Externs" -#include "Math" #include "Training" #include "Memory" -#include "MathODE" +#include "Math/MakeMath" #include "Gr/MakeGr" #include "Sound" #include "BlkDev/MakeZBlkDev" diff --git a/src/System/Math/Conversion.ZC b/src/System/Math/Conversion.ZC new file mode 100755 index 00000000..f2d5396f --- /dev/null +++ b/src/System/Math/Conversion.ZC @@ -0,0 +1,89 @@ +asm +{ +_F64_TO_F32:: + PUSH RBP + MOV RBP, RSP + + CVTSD2SS XMM0, SF_ARG1[RBP] + MOVQ RAX, XMM0 + + POP RBP + RET1 8 +} +/** + @ingroup Math + @brief Convert double-precision scalar to single-precision. + + @param[in] d Double-precision scalar. + @return Single-precision scalar. +*/ +_extern _F64_TO_F32 U32 F64ToF32(F64 d); + +asm +{ +_F32_TO_F64:: + PUSH RBP + MOV RBP, RSP + + CVTSS2SD XMM0, SF_ARG1[RBP] + MOVQ RAX, XMM0 + + POP RBP + RET1 8 +} +/** + @ingroup Math + @brief Convert single-precision scalar to double-precision. + + @param[in] f Single-precision scalar. + @return Double-precision scalar. +*/ +_extern _F32_TO_F64 F64 F32ToF64(U32 f); + +asm +{ +_F32_RAD_TO_DEG:: + PUSH RBP + MOV RBP, RSP + + MOVSS XMM0, SF_ARG1[RBP] + MOV RAX, F32_PI_OVER_180 + MOVQ XMM1, RAX + MULSS XMM0, XMM1 + MOVQ RAX, XMM0 + + POP RBP + RET1 8 +} +/** + @ingroup Math + @brief Convert single-precision radians to single-precision degrees. + + @param[in] rad Single-precision angle in radians. + @return Single-precision angle in degrees. +*/ +_extern _F32_RAD_TO_DEG F32 F32RadToDeg(F32 rad); + +asm +{ +_F32_DEG_TO_RAD:: + PUSH RBP + MOV RBP, RSP + + MOVSS XMM0, SF_ARG1[RBP] + MOV RAX, F32_180_OVER_PI + MOVQ XMM1, RAX + MULSS XMM0, XMM1 + MOVQ RAX, XMM0 + + POP RBP + RET1 8 +} +/** + @ingroup Math + @brief Convert single-precision degrees to single-precision radians. + + @param[in] deg Single-precision angle in degrees. + @return Single-precision angle in radians. +*/ +_extern _F32_DEG_TO_RAD F32 F32DegToRad(F32 deg); diff --git a/src/System/Math/F32.ZC b/src/System/Math/F32.ZC new file mode 100755 index 00000000..d4d2d8e8 --- /dev/null +++ b/src/System/Math/F32.ZC @@ -0,0 +1,136 @@ + +F64 F64_QUADRANT_CONSTANT_ARRAY[2] = {0, ã}; + +#define F64_1 0x3FF0000000000000 +#define F64_1_OVER_2_FACTORIAL 0x3FE0000000000000 // 1/2! +#define F64_1_OVER_4_FACTORIAL 0x3FA5555555555555 // 1/4! +#define F64_1_OVER_6_FACTORIAL 0x3F56C16C16C16C17 // 1/6! +#define F64_1_OVER_8_FACTORIAL 0x3EFA01A01A01A01A // 1/8! +#define F64_1_OVER_10_FACTORIAL 0x3E927E4FB7789F5C // 1/10! +#define F64_1_OVER_12_FACTORIAL 0x3E21EED8EFF8D898 // 1/12! +#define F64_1_OVER_14_FACTORIAL 0x3DA93974A8C07C9D // 1/14! + +#define F64_2_OVER_PI 0x3FE45F306DC9C883 +#define F64_PI_OVER_2 0x3FF921FB54442D18 + +asm +{ +_F32_COS:: + PUSH RBP + MOV RBP, RSP + PUSH RBX + + CVTSS2SD XMM0, SF_ARG1[RBP]//XMM0: é + +/* + I64 quadrant = é * (2.0 / ã); +*/ + + MOV RAX, F64_2_OVER_PI + MOVQ XMM1, RAX // XMM1: 2.0 / Pi + MULSD XMM1, XMM0 // XMM1: (2.0 / ã) * é + CVTSD2SI RBX, XMM1 // RBX : quadrant = (I64) ((2.0 / ã) * é) + +/* + é = é - (F32) quadrant * (ã / 2.0); +*/ + + CVTSI2SD XMM1, RBX // XMM1: (F32) quadrant + MOV RAX, F64_PI_OVER_2 + MOVQ XMM2, RAX // XMM2: ã / 2.0 + MULSD XMM2, XMM1 // XMM2: (ã / 2.0) * (F32) quadrant + SUBSD XMM0, XMM2 // XMM0: é = é - ((ã / 2.0) * (F32) quadrant) + +/* + quadrant += 1; + é = gTrigQuadrantConstants[(quadrant >> 1) & 1] - é; +*/ + + INC RBX // RBX : quadrant += 1 + SHR RBX, 1 // RBX : quadrant >> 1 + AND RBX, 1 // RBX : (quadrant >> 1) & 1 + SHL RBX, 3 // RBX : ((quadrant >> 1) & 1) * sizeof(F64) + MOV RAX, &F64_QUADRANT_CONSTANT_ARRAY + ADD RBX, RAX // RBX : constants + ((quadrant >> 1) & 1) + MOVQ XMM1, [RBX] // XMM1: constants[(quadrant >> 1) & 1] + SUBSD XMM1, XMM0 // XMM1: é = constants[(quadrant >> 1) & 1] - é + +/* + é2 = -(é * é); +*/ + + MULSD XMM1, XMM1 // XMM1: é * é + XORPS XMM0, XMM0 // XMM0: {0.0, 0.0} + SUBSD XMM0, XMM1 // XMM0: é2 = -(é * é) + +/* + F64 r = (1/14!) * é2; +*/ + + MOV RAX, F64_1_OVER_14_FACTORIAL + MOVQ XMM1, RAX + MULSD XMM1, XMM0 + +/* + r += (1/n!); + r *= é2; +*/ + + MOV RAX, F64_1_OVER_12_FACTORIAL + MOVQ XMM2, RAX // XMM2: (1/6!) + ADDSD XMM1, XMM2 // XMM1: r += (1/6!) + MULSD XMM1, XMM0 // XMM1: r *= é + + MOV RAX, F64_1_OVER_10_FACTORIAL + MOVQ XMM2, RAX // XMM2: (1/4!) + ADDSD XMM1, XMM2 // XMM1: r += (1/4!) + MULSD XMM1, XMM0 // XMM1: r *= é + + MOV RAX, F64_1_OVER_8_FACTORIAL + MOVQ XMM2, RAX // XMM2: (1/2!) + ADDSD XMM1, XMM2 // XMM1: r += (1/2!) + MULSD XMM1, XMM0 // XMM1: r *= é + + MOV RAX, F64_1_OVER_6_FACTORIAL + MOVQ XMM2, RAX // XMM2: (1/2!) + ADDSD XMM1, XMM2 // XMM1: r += (1/2!) + MULSD XMM1, XMM0 // XMM1: r *= é + + MOV RAX, F64_1_OVER_4_FACTORIAL + MOVQ XMM2, RAX // XMM2: (1/2!) + ADDSD XMM1, XMM2 // XMM1: r += (1/2!) + MULSD XMM1, XMM0 // XMM1: r *= é + + MOV RAX, F64_1_OVER_2_FACTORIAL + MOVQ XMM2, RAX // XMM2: (1/2!) + ADDSD XMM1, XMM2 // XMM1: r += (1/2!) + MULSD XMM1, XMM0 // XMM1: r *= é + +/* + return (F32) (r + 1.0) +*/ + + MOV RAX, F64_1 + MOVQ XMM2, RAX // XMM2: 1.0 + ADDSD XMM1, XMM2 // XMM1: r += 1.0 + CVTSD2SS XMM0, XMM1 // XMM0: (F32) r + MOVQ RAX, XMM0 + + POP RBX + POP RBP + RET1 8 +} +/** + @ingroup Math + @brief Calculate single precision cosine. + + Note that it isn't correct for all values of é yet. + + Based on the paper: + Fast Trigonometric Functions using Intel's SSE2 Instructions. 2003. + L. Nyland, M. Snyder + + @param[in] theta Angle in radians. + @return sin(theta). +*/ +_extern _F32_COS F32 CosF32(F32 theta); \ No newline at end of file diff --git a/src/System/Math/MakeMath.ZC b/src/System/Math/MakeMath.ZC new file mode 100755 index 00000000..ed3b4cd7 --- /dev/null +++ b/src/System/Math/MakeMath.ZC @@ -0,0 +1,20 @@ +Cd(__DIR__);; +#include "Math" +#include "MathODE" + +Option(OPTf_WARN_PAREN, ON); +Option(OPTf_WARN_DUP_TYPES, ON); +Option(OPTf_WARN_UNUSED_VAR, OFF); // Assembly pulls args from the stack + // instead of using argument identifiers. +#include "Types" +#include "Conversion" +#include "F32" +#include "Vec4" +#include "Vec3" +#include "Mat4" + +#define HOLYMATH_COMPILED + +Option(OPTf_WARN_UNUSED_VAR, ON); + +Cd("..");; \ No newline at end of file diff --git a/src/System/Math/Mat4.ZC b/src/System/Math/Mat4.ZC new file mode 100755 index 00000000..9be26afa --- /dev/null +++ b/src/System/Math/Mat4.ZC @@ -0,0 +1,350 @@ + +/** + @ingroup Math + @brief Print 4x4 matrix. + + @param[in] m 4x4 matrix. +*/ +U0 Matrix4DPrint(CMatrix4D *m) +{ + U8 reg R14 str = "%n\t%n\t%n\t%n\n\n"; +asm +{ + PUSH R15 + + XOR R15, R15 // = i = 0 +// for (RBX = 0; RBX < 4; RBX++) +@@05: + MOV RAX, R15 // = i + SHL RAX, 2 // = i * 4 + ADD RAX, SF_ARG1[RBP] // = &m + i * 4 + + SUB RSP, 32 + + CVTSS2SD XMM0, 48[RAX] + MOVSD_SSE 24[RSP], XMM0 + + CVTSS2SD XMM0, 32[RAX] + MOVSD_SSE 16[RSP], XMM0 + + CVTSS2SD XMM0, 16[RAX] + MOVSD_SSE 8[RSP], XMM0 + + CVTSS2SD XMM0, [RAX] + MOVSD_SSE [RSP], XMM0 + + PUSH 4 + PUSH R14 + CALL &Print + ADD RSP, 48 + + INC R15 + CMP R15, 4 + JNE @@05 + + POP R15 +} +} + +/** + @ingroup Math + @brief Check if two 4x4 matrices are equal. + + @param[in] a Matrix 1 + @param[in] b Matrix 2 +*/ +Bool Matrix4DIsEqual(CMatrix4D *a, CMatrix4D *b) +{ + I64 i, j, + total = 0; + + for (i = 0; i < 4; i++) + { + total += Vector4DIsEqual(&a->vec[i], &b->vec[i]); + } + + if (total == 4) + return TRUE; + else + return FALSE; +} + +asm +{ +_MATRIX_4D_MUL_VECTOR_4D:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG2[RBP] + MOVAPS XMM0, [RAX] + MOVAPS XMM1, XMM0 + MOVAPS XMM2, XMM0 + MOVAPS XMM3, XMM0 + SHUFPS XMM0, XMM0, 0x00 // (0, 0, 0, 0) + SHUFPS XMM1, XMM1, 0x55 // (1, 1, 1, 1) + SHUFPS XMM2, XMM2, 0xAA // (2, 2, 2, 2) + SHUFPS XMM3, XMM3, 0xFF // (3, 3, 3, 3) + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM4, [RAX] + MOVAPS XMM5, 16[RAX] + MOVAPS XMM6, 32[RAX] + MOVAPS XMM7, 48[RAX] + + MULPS XMM4, XMM0 + MULPS XMM5, XMM1 + MULPS XMM6, XMM2 + MULPS XMM7, XMM3 + + ADDPS XMM4, XMM5 + ADDPS XMM6, XMM7 + ADDPS XMM4, XMM6 + + MOV RAX, SF_ARG3[RBP] + MOVAPS [RAX], XMM4 + + POP RBP + RET1 24 +} +/** + @ingroup Math + @brief Multiply 4x4 matrix by 4D vector. + + @param[in] m 4x4 matrix. + @param[in] v 4D vector. + @param[in,out] dest Destination 4D vector. +*/ +_extern _MATRIX_4D_MUL_VECTOR_4D U0 Matrix4DMulVector4D(CMatrix4D *m, CVector4D *v, CVector4D *dest); + +asm +{ +_MATRIX_4D_MUL:: + PUSH RBP + MOV RBP, RSP + PUSH R14 + PUSH R15 + + MOV RAX, SF_ARG1[RBP] + MOV R14, SF_ARG2[RBP] + MOV R15, SF_ARG3[RBP] + + MOVAPS XMM4, [RAX] + MOVAPS XMM5, 16[RAX] + MOVAPS XMM6, 32[RAX] + MOVAPS XMM7, 48[RAX] + + MOV RAX, 4 // = i = 4 +@@05: + MOVAPS XMM0, [R14] + MOVAPS XMM1, XMM0 + MOVAPS XMM2, XMM0 + MOVAPS XMM3, XMM0 + SHUFPS XMM0, XMM0, 0x00 // (0, 0, 0, 0) + SHUFPS XMM1, XMM1, 0x55 // (1, 1, 1, 1) + SHUFPS XMM2, XMM2, 0xAA // (2, 2, 2, 2) + SHUFPS XMM3, XMM3, 0xFF // (3, 3, 3, 3) + + MULPS XMM0, XMM4 + MULPS XMM1, XMM5 + MULPS XMM2, XMM6 + MULPS XMM3, XMM7 + + ADDPS XMM0, XMM1 + ADDPS XMM2, XMM3 + ADDPS XMM0, XMM2 + + MOVAPS [R15], XMM0 + + ADD R14, 16 + ADD R15, 16 + + DEC RAX + JNZ @@05 + + POP R15 + POP R14 + POP RBP + RET1 24 +} +/** + @ingroup Math + @brief Multiply 4x4 matrix by 4x4 matrix. + + @param[in] a 4x4 matrix. + @param[in] b 4x4 matrix. + @param[in,out] dest Destination 4x4 matrix. +*/ +_extern _MATRIX_4D_MUL U0 Matrix4DMul(CMatrix4D *a, CMatrix4D *b, CMatrix4D *dest); + +asm +{ +_MATRIX_4D_TRANSPOSE:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOVAPS XMM1, XMM0 + MOVAPS XMM2, 16[RAX] + + MOVAPS XMM3, 32[RAX] + MOVAPS XMM4, XMM3 + MOVAPS XMM5, 48[RAX] + + SHUFPS XMM0, XMM2, 0x44 // (0, 1, 0, 1) TMP 0 + SHUFPS XMM1, XMM2, 0xEE // (2, 3, 2, 3) TMP 2 + SHUFPS XMM3, XMM5, 0x44 // (0, 1, 0, 1) TMP 1 + SHUFPS XMM4, XMM5, 0xEE // (2, 3, 2, 3) TMP 3 + + MOVAPS XMM2, XMM0 // XMM2 = TMP 0 + MOVAPS XMM5, XMM1 // XMM5 = TMP 2 + + SHUFPS XMM0, XMM3, 0x88 // (0, 2, 0, 2) + SHUFPS XMM2, XMM3, 0xDD // (1, 3, 1, 3) + SHUFPS XMM1, XMM4, 0x88 // (0, 2, 0, 2) + SHUFPS XMM5, XMM4, 0xDD // (1, 3, 1, 3) + + MOV RAX, SF_ARG2[RBP] + MOVAPS [RAX], XMM0 + MOVAPS 16[RAX], XMM2 + MOVAPS 32[RAX], XMM1 + MOVAPS 48[RAX], XMM5 + + POP RBP + RET1 16 +} +/** + @ingroup Math + @brief Transpose 4x4 matrix. + + @param[in] m 4x4 matrix. + @param[in,out] dest Destination 4x4 matrix. +*/ +_extern _MATRIX_4D_TRANSPOSE U0 Matrix4DTranspose(CMatrix4D *m, CMatrix4D *dest); + +/** + @ingroup Math + @brief Clear 4x4 matrix and set it to translation transformation. + + @param[in] x X translation. + @param[in] y Y translation. + @param[in] z Z translation. + @param[in,out] dest Destination 4x4 matrix. +*/ +U0 Matrix4DTranslationSet(F32 x, F32 y, F32 z, CMatrix4D *dest) +{ + dest->e[MAT4_00] = F32_ONE; + dest->e[MAT4_10] = F32_ZERO; + dest->e[MAT4_20] = F32_ZERO; + dest->e[MAT4_30] = F32_ZERO; + + dest->e[MAT4_01] = F32_ZERO; + dest->e[MAT4_11] = F32_ONE; + dest->e[MAT4_21] = F32_ZERO; + dest->e[MAT4_31] = F32_ZERO; + + dest->e[MAT4_02] = F32_ZERO; + dest->e[MAT4_12] = F32_ZERO; + dest->e[MAT4_22] = F32_ONE; + dest->e[MAT4_32] = F32_ZERO; + + dest->e[MAT4_03] = x; + dest->e[MAT4_13] = y; + dest->e[MAT4_23] = z; + dest->e[MAT4_33] = F32_ONE; +} + +/** + @ingroup Math + @brief Clear 4x4 matrix and set it to scale transformation. + + @param[in] x X scale. + @param[in] y Y scale. + @param[in] z Z scale. + @param[in,out] dest Destination 4x4 matrix. +*/ +U0 Matrix4DScaleSet(F32 x, F32 y, F32 z, CMatrix4D *dest) +{ + dest->e[MAT4_00] = x; + dest->e[MAT4_10] = F32_ZERO; + dest->e[MAT4_20] = F32_ZERO; + dest->e[MAT4_30] = F32_ZERO; + + dest->e[MAT4_01] = F32_ZERO; + dest->e[MAT4_11] = y; + dest->e[MAT4_21] = F32_ZERO; + dest->e[MAT4_31] = F32_ZERO; + + dest->e[MAT4_02] = F32_ZERO; + dest->e[MAT4_12] = F32_ZERO; + dest->e[MAT4_22] = z; + dest->e[MAT4_32] = F32_ZERO; + + dest->e[MAT4_03] = F32_ZERO; + dest->e[MAT4_13] = F32_ZERO; + dest->e[MAT4_23] = F32_ZERO; + dest->e[MAT4_33] = F32_ONE; +} + +/** + @ingroup Math + @brief Clear 4x4 matrix and set it to a look-at transform. + + Ú ¿ Ú ¿ Ú ¿ + ³Rx Ry Rz 0³ ³1 0 0 -Ex³ ³Rx Ry Rz -(R ù E)³ + LookAt = ³Ux Uy Uz 0³ * ³0 1 0 -Ey³ = ³Ux Uy Ut -(U ù E)³ + ³-Fx -Fy -Fz 0³ ³0 0 1 -Ez³ ³Fx Fy Fz F ù E ³ + ³0 0 0 1³ ³0 0 0 1 ³ ³0 0 0 1 ³ + À Ù À Ù À Ù + Where: + ùR is the eye right direction. + ùU is the eye up direction. + ùF is the eye forward direction (away from target towards viewer). + ùE is the position of the eye. + + @param[in] eye Position of eye. + @param[in] center Position to look at. + @param[in] up Up direction vector. Can be general like an axis, + does not need to be orthographic in relation + to camera angle. + @param[in,out] dest Destination 4x4 matrix. +*/ +U0 Matrix4DLookAtSet(CVector3D *eye, CVector3D *center, CVector3D *up, CMatrix4D *dest) +{ + Vector3DSub(center, eye, gVec4Temp0); // Forward vector + Vector3DNormalize(gVec4Temp0, gVec4Temp0); + + Vector3DCross(gVec4Temp0, up, gVec4Temp1); // Right vector + Vector3DNormalize(gVec4Temp1, gVec4Temp1); + + Vector3DCross(gVec4Temp1, gVec4Temp0, gVec4Temp2); // Correct Up vector + + // Use dot products to apply eye translation matrix without a 4x4 multiply. + dest->e[MAT4_03] = Vector3DDot(gVec4Temp1, eye) ^ F32_NEGATE_MASK; + dest->e[MAT4_13] = Vector3DDot(gVec4Temp2, eye) ^ F32_NEGATE_MASK; + dest->e[MAT4_23] = Vector3DDot(gVec4Temp0, eye); + dest->e[MAT4_33] = F32_ONE; + + Vector3DNegate(gVec4Temp0, gVec4Temp0); // Forward = -Forward + + dest->e[MAT4_00] = gVec4Temp1->x; // Right + dest->e[MAT4_10] = gVec4Temp2->x; // Up + dest->e[MAT4_20] = gVec4Temp0->x; // Forward + dest->e[MAT4_30] = F32_ZERO; + + dest->e[MAT4_01] = gVec4Temp1->y; + dest->e[MAT4_11] = gVec4Temp2->y; + dest->e[MAT4_21] = gVec4Temp0->y; + dest->e[MAT4_31] = F32_ZERO; + + dest->e[MAT4_02] = gVec4Temp1->z; + dest->e[MAT4_12] = gVec4Temp2->z; + dest->e[MAT4_22] = gVec4Temp0->z; + dest->e[MAT4_32] = F32_ZERO; +} + + + + + + diff --git a/src/System/Math.ZC b/src/System/Math/Math.ZC similarity index 100% rename from src/System/Math.ZC rename to src/System/Math/Math.ZC diff --git a/src/System/MathODE.ZC b/src/System/Math/MathODE.ZC similarity index 100% rename from src/System/MathODE.ZC rename to src/System/Math/MathODE.ZC diff --git a/src/System/Math/NDArray.ZC b/src/System/Math/NDArray.ZC new file mode 100755 index 00000000..e8e0afca --- /dev/null +++ b/src/System/Math/NDArray.ZC @@ -0,0 +1,86 @@ + +/** + @ingroup Math + @brief Initialize n-dimensional array. + + @param[in,out] arr Array array to initialize. + @param[in] type Type of Array, see NDARRAY_ macros. + @param[in] clear Whether the clear the initialized array with zeros. + @param[in] ... List of lengths for each dimension. +*/ +U0 ArrayInit(CArray *arr, U16 type, Bool clear, ...) +{ + I64 i; + + arr->dims = argc; + arr->dimLens = MAlloc(sizeof(U64) * argc); + arr->type = type; + arr->size = 1; + + for (i = 0; i < argc; i++) + { + arr->dimLens[i] = argv[i]; + arr->size *= argv[i]; + } + + switch (type) + { + case NDARRAY_I32: + if (clear) + arr->I32Buf = CAllocAligned(sizeof(I32) * arr->size, 16); + else + arr->I32Buf = MAllocAligned(sizeof(I32) * arr->size, 16); + break; + + case NDARRAY_I64: + if (clear) + arr->I64Buf = CAllocAligned(sizeof(I64) * arr->size, 16); + else + arr->I64Buf = CAllocAligned(sizeof(I64) * arr->size, 16); + break; + + case NDARRAY_F32: + if (clear) + arr->F32Buf = CAllocAligned(sizeof(F32) * arr->size, 16); + else + arr->F32Buf = CAllocAligned(sizeof(F32) * arr->size, 16); + break; + + case NDARRAY_F64: + if (clear) + arr->F64Buf = CAllocAligned(sizeof(I32) * arr->size, 16); + else + arr->F64Buf = MAllocAligned(sizeof(I64) * arr->size, 16); + break; + } +} + +/** + @ingroup Math + @brief Free n-dimensional array. + + @param[in,out] arr Array to free. +*/ +U0 ArrayFree(CArray *arr) +{ + Free(arr->dimLens); + + switch (type) + { + case NDARRAY_I32: + Free(arr->I32Buf); + break; + + case NDARRAY_I64: + Free(arr->I64Buf); + break; + + case NDARRAY_F32: + Free(arr->F32Buf); + break; + + case NDARRAY_F64: + Free(arr->F64Buf); + break; + } +} diff --git a/src/System/Math/Tests/TestF32.ZC b/src/System/Math/Tests/TestF32.ZC new file mode 100755 index 00000000..ac628a5e --- /dev/null +++ b/src/System/Math/Tests/TestF32.ZC @@ -0,0 +1,54 @@ + +Cd(__DIR__);; + +//----------------------------------------------------------------------------- +// CosF32 +/* This test depends on the HolyGL library for graphing. */ + +F32 q0 = CosF32(F64ToF32(0.785398)); +F32 q1 = CosF32(F64ToF32(2.35619)); +F32 q2 = CosF32(F64ToF32(3.92699)); +F32 q3 = CosF32(F64ToF32(5.49779)); +"Cos(45deg) CosF32: %n | Cos: %n\n", F32ToF64(q0), Cos(0.785398); +"Cos(135deg) CosF32: %n | Cos: %n\n", F32ToF64(q1), Cos(2.35619); +"Cos(225deg) CosF32: %n | Cos: %n\n", F32ToF64(q2), Cos(3.92699); +"Cos(315deg) CosF32: %n | Cos: %n\n", F32ToF64(q3), Cos(5.49779); + +CGLTex2D graph; +GLTex2DInit(&graph, GL_TEX2D_RAW, 600, 512); +GLTex2DColorFill(&graph, 0xFFFFFF); +GLBindColorTarget(&graph); + +F64 temp = (ã / 2) * 100.0; +GLDrawLine(temp, 0, temp, 512, 0x00FF00); + +F64 temp = (2 * ã / 2) * 100.0; +GLDrawLine(temp, 0, temp, 512, 0x00FF00); + +F64 temp = (3 * ã / 2) * 100.0; +GLDrawLine(temp, 0, temp, 512, 0x00FF00); + +I64 i; +F64 angle; +F64 result; +for (i = 0; i < 600; i++) +{ + angle = i * 0.01; + + result = Cos(angle); + result *= -100.0; + result += 256; + GLDrawPixel(i, result, 0xFF0000); + + result = F32ToF64(CosF32(F64ToF32(angle))); + result *= -100.0; + result += 256; + GLDrawPixel(i, result, 0x0000FF); +} + +while (CharScan() == 0) +{ + GLTex2DDebugDisp(&graph, 16, 16); + Sleep(1); +} +//----------------------------------------------------------------------------- \ No newline at end of file diff --git a/src/System/Math/Tests/TestMat4.ZC b/src/System/Math/Tests/TestMat4.ZC new file mode 100755 index 00000000..9b019fb7 --- /dev/null +++ b/src/System/Math/Tests/TestMat4.ZC @@ -0,0 +1,103 @@ +Cd(__DIR__);; + +#define ARCH_SUPPORTS_AVX FALSE +#define ARCH_SUPPORTS_AVX2 TRUE + +F32 s; +CVector4D *v0 = MAllocAligned(sizeof(CVector4D), 16); +CVector4D *v1 = MAllocAligned(sizeof(CVector4D), 16); +CVector4D *v2 = MAllocAligned(sizeof(CVector4D), 16); +CMatrix4D *m1 = MAllocAligned(sizeof(CMatrix4D), 16); +CMatrix4D *m2 = MAllocAligned(sizeof(CMatrix4D), 16); + +CMatrix4D *destMat = MAllocAligned(sizeof(CMatrix4D), 16); +CVector4D *destVec = MAllocAligned(sizeof(CVector4D), 16); + +CMatrix4D *trueMatRes = MAllocAligned(sizeof(CMatrix4D), 16); +CVector4D *trueVecRes = MAllocAligned(sizeof(CVector4D), 16); + +F32 theta; + +CVector4D *countVec = MAllocAligned(sizeof(CVector4D), 16); +Vector4DInit(1.0, 2.0, 3.0, 4.0, countVec); + +CMatrix4D *countMat = MAllocAligned(sizeof(CMatrix4D), 16); +Vector4DInit(1.0, 2.0, 3.0, 4.0, &countMat->vec[0]); +Vector4DInit(5.0, 6.0, 7.0, 8.0, &countMat->vec[1]); +Vector4DInit(9.0, 10.0, 11.0, 12.0, &countMat->vec[2]); +Vector4DInit(13.0, 14.0, 15.0, 16.0, &countMat->vec[3]); + +//----------------------------------------------------------------------------- +// Matrix4DMulVector4D + +Vector4DInit(90.0, 100.0, 110.0, 120.0, trueVecRes); +Matrix4DMulVector4D(countMat, countVec, destVec); +if (!Vector4DIsEqual(destVec, trueVecRes)) +{ + ST_WARN_ST "Matrix4DMulVector4D NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector4DPrint(destVec); +} else "$$GREEN$$PASS: Matrix4DMulVector4D $$FG$$\n"; +//----------------------------------------------------------------------------- +// Matrix4DMul + +Vector4DInit(90.0, 100.0, 110.0, 120.0, &trueMatRes->vec[0]); +Vector4DInit(202.0, 228.0, 254.0, 280.0, &trueMatRes->vec[1]); +Vector4DInit(314.0, 356.0, 398.0, 440.0, &trueMatRes->vec[2]); +Vector4DInit(426.0, 484.0, 542.0, 600.0, &trueMatRes->vec[3]); +Matrix4DMul(countMat, countMat, destMat); +if (!Matrix4DIsEqual(destMat, trueMatRes)) +{ + ST_WARN_ST "Matrix4DMul NOT WORKING CORRECTLY! RESULT MATRIX:\n"; + Matrix4DPrint(destMat); +} else "$$GREEN$$PASS: Matrix4DMul $$FG$$\n"; +//----------------------------------------------------------------------------- +// Matrix4DTranspose + +Vector4DInit(1.0, 5.0, 9.0, 13.0, &trueMatRes->vec[0]); +Vector4DInit(2.0, 6.0, 10.0, 14.0, &trueMatRes->vec[1]); +Vector4DInit(3.0, 7.0, 11.0, 15.0, &trueMatRes->vec[2]); +Vector4DInit(4.0, 8.0, 12.0, 16.0, &trueMatRes->vec[3]); +Matrix4DTranspose(countMat, destMat); +if (!Matrix4DIsEqual(destMat, trueMatRes)) +{ + ST_WARN_ST "Matrix4DTranspose NOT WORKING CORRECTLY! RESULT MATRIX:\n"; + Matrix4DPrint(destMat); +} else "$$GREEN$$PASS: Matrix4DTranspose $$FG$$\n"; +//----------------------------------------------------------------------------- +// Matrix4DTranslationSet + +Vector4DInit(1.0, 0.0, 0.0, 0.0, &trueMatRes->vec[0]); +Vector4DInit(0.0, 1.0, 0.0, 0.0, &trueMatRes->vec[1]); +Vector4DInit(0.0, 0.0, 1.0, 0.0, &trueMatRes->vec[2]); +Vector4DInit(2.0, 3.0, 4.0, 1.0, &trueMatRes->vec[3]); +Matrix4DTranslationSet(F64ToF32(2.0), F64ToF32(3.0), F64ToF32(4.0), destMat); +if (!Matrix4DIsEqual(destMat, trueMatRes)) +{ + ST_WARN_ST "Matrix4DTranslationSet NOT WORKING CORRECTLY! RESULT MATRIX:\n"; + Matrix4DPrint(destMat); +} else "$$GREEN$$PASS: Matrix4DTranslationSet $$FG$$\n"; +//----------------------------------------------------------------------------- +// Matrix4DScaleSet + +Vector4DInit(2.0, 0.0, 0.0, 0.0, &trueMatRes->vec[0]); +Vector4DInit(0.0, 3.0, 0.0, 0.0, &trueMatRes->vec[1]); +Vector4DInit(0.0, 0.0, 4.0, 0.0, &trueMatRes->vec[2]); +Vector4DInit(0.0, 0.0, 0.0, 1.0, &trueMatRes->vec[3]); +Matrix4DScaleSet(F64ToF32(2.0), F64ToF32(3.0), F64ToF32(4.0), destMat); +if (!Matrix4DIsEqual(destMat, trueMatRes)) +{ + ST_WARN_ST "Matrix4DScaleSet NOT WORKING CORRECTLY! RESULT MATRIX:\n"; + Matrix4DPrint(destMat); +} else "$$GREEN$$PASS: Matrix4DScaleSet $$FG$$\n"; +//----------------------------------------------------------------------------- +// Matrix4DRotationSet + +//----------------------------------------------------------------------------- +// Matrix4DLookAtSet + +Vector3DInit(2.0, 3.0, 4.0, v0); // Eye +Vector3DInit(1.0, -2.0, -3.0, v1); // Center +Vector3DInit(0.0, 0.0, 1.0, v2); // Up +Matrix4DLookAtSet(v0, v1, v2, destMat); +Matrix4DPrint(destMat); +//----------------------------------------------------------------------------- \ No newline at end of file diff --git a/src/System/Math/Tests/TestVec.ZC b/src/System/Math/Tests/TestVec.ZC new file mode 100755 index 00000000..ad6ffe01 --- /dev/null +++ b/src/System/Math/Tests/TestVec.ZC @@ -0,0 +1,206 @@ +Cd(__DIR__);; + +#define ARCH_SUPPORTS_AVX TRUE +#define ARCH_SUPPORTS_AVX2 TRUE + +F32 s; +CVector4D *a = MAllocAligned(sizeof(CVector4D), 16); +CVector4D *b = MAllocAligned(sizeof(CVector4D), 16); +CVector4D *dest = MAllocAligned(sizeof(CVector4D), 16); +I64 destS; +CVector4D *trueRes = MAllocAligned(sizeof(CVector4D), 16); + +// Note that some smaller vector functions are identical to larger vector +// functions so they do not get tested. + +//----------------------------------------------------------------------------- +// Vector4DCopy + +Vector4DInit(1.25, 2.5, 3.75, 5.0, a); +Vector4DInit(0.0, 0.0, 0.0, 0.0, dest); +Vector4DCopy(a, dest); +if (!Vector4DIsEqual(a, dest)) +{ + ST_WARN_ST "Vector4DCopy NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector4DPrint(dest); +} else "$$GREEN$$PASS: Vector4DCopy $$FG$$\n"; +//----------------------------------------------------------------------------- +// Vector4DAdd + +Vector4DInit(1.25, -2.5, 3.75, -5.0, a); +Vector4DInit(-1.0, 2.0, -3.0, 4.0, b); +Vector4DInit(0.25, -0.5, 0.75, -1.0, trueRes); +Vector4DAdd(a, b, dest); +if (!Vector4DIsEqual(dest, trueRes)) +{ + ST_WARN_ST "Vector4DAdd NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector4DPrint(dest); +} else "$$GREEN$$PASS: Vector4DAdd $$FG$$\n"; +//----------------------------------------------------------------------------- +// Vector4DAddS + +Vector4DInit(1.25, -2.5, 3.75, -5.0, a); +s = F64ToF32(2.0); +Vector4DInit(3.25, -0.5, 5.75, -3.0, trueRes); +Vector4DAddS(a, s, dest); +if (!Vector4DIsEqual(dest, trueRes)) +{ + ST_WARN_ST "Vector4DAddS NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector4DPrint(dest); +} else "$$GREEN$$PASS: Vector4DAddS $$FG$$\n"; +//----------------------------------------------------------------------------- +// Vector4DSub + +Vector4DInit(1.25, -2.5, 3.75, -5.0, a); +Vector4DInit(-1.0, 2.0, -3.0, 4.0, b); +Vector4DInit(2.25, -4.5, 6.75, -9.0, trueRes); +Vector4DSub(a, b, dest); +if (!Vector4DIsEqual(dest, trueRes)) +{ + ST_WARN_ST "Vector4DSub NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector4DPrint(dest); +} else "$$GREEN$$PASS: Vector4DSub $$FG$$\n"; +//----------------------------------------------------------------------------- +// Vector4DSubS + +Vector4DInit(1.25, -2.5, 3.75, -5.0, a); +s = F64ToF32(2.0); +Vector4DInit(-0.75, -4.5, 1.75, -7.0, trueRes); +Vector4DSubS(a, s, dest); +if (!Vector4DIsEqual(dest, trueRes)) +{ + ST_WARN_ST "Vector4DSubS NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector4DPrint(dest); +} else "$$GREEN$$PASS: Vector4DSubS $$FG$$\n"; +//----------------------------------------------------------------------------- +// Vector4DMul + +Vector4DInit(1.25, -2.5, 3.75, -5.0, a); +Vector4DInit(-1.0, 2.0, -3.0, 4.0, b); +Vector4DInit(-1.25, -5.0, -11.25, -20.0, trueRes); +Vector4DMul(a, b, dest); +if (!Vector4DIsEqual(dest, trueRes)) +{ + ST_WARN_ST "Vector4DMul NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector4DPrint(dest); +} else "$$GREEN$$PASS: Vector4DMul $$FG$$\n"; +//----------------------------------------------------------------------------- +// Vector4DMulS + +Vector4DInit(1.25, -2.5, 3.75, -5.0, a); +s = F64ToF32(2.0); +Vector4DInit(2.5, -5.0, 7.5, -10.0, trueRes); +Vector4DMulS(a, s, dest); +if (!Vector4DIsEqual(dest, trueRes)) +{ + ST_WARN_ST "Vector4DMulS NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector4DPrint(dest); +} else "$$GREEN$$PASS: Vector4DMulS $$FG$$\n"; +//----------------------------------------------------------------------------- +// Vector4DDiv + +Vector4DInit(1.25, -2.5, 3.75, -5.0, a); +Vector4DInit(-1.0, 2.0, -3.0, 4.0, b); +Vector4DInit(-1.25, -1.25, -1.25, -1.25, trueRes); +Vector4DDiv(a, b, dest); +if (!Vector4DIsEqual(dest, trueRes)) +{ + ST_WARN_ST "Vector4DDiv NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector4DPrint(dest); +} else "$$GREEN$$PASS: Vector4DDiv $$FG$$\n"; +//----------------------------------------------------------------------------- +// Vector4DDivS + +Vector4DInit(1.25, -2.5, 3.75, -5.0, a); +s = F64ToF32(2.0); +Vector4DInit(0.625, -1.25, 1.875, -2.5, trueRes); +Vector4DDivS(a, s, dest); +if (!Vector4DIsEqual(dest, trueRes)) +{ + ST_WARN_ST "Vector4DDivS NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector4DPrint(dest); +} else "$$GREEN$$PASS: Vector4DDivS $$FG$$\n"; +//----------------------------------------------------------------------------- +// Vector4DMin + +Vector4DInit(1.25, -2.5, 3.75, -5.0, a); +Vector4DInit(-1.0, 2.0, -3.0, 4.0, b); +Vector4DInit(-1.0,-2.5,-3.0,-5.0, trueRes); +Vector4DMin(a, b, dest); +if(!Vector4DIsEqual(dest, trueRes)) +{ + ST_WARN_ST "Vector4DMin NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector4DPrint(dest); +} else "$$GREEN$$PASS: Vector4DMin $$FG$$\n"; +//----------------------------------------------------------------------------- +// Vector4DMax + +Vector4DInit(1.25, -2.5, 3.75, -5.0, a); +Vector4DInit(-1.0, 2.0, -3.0, 4.0, b); +Vector4DInit(1.25, 2.0, 3.75, 4.0, trueRes); +Vector4DMax(a, b, dest); +if (!Vector4DIsEqual(dest, trueRes)) +{ + ST_WARN_ST "Vector4DMax NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector4DPrint(dest); +} else "$$GREEN$$PASS: Vector4DMax $$FG$$\n"; +//----------------------------------------------------------------------------- +// Vector4DNegate + +Vector4DInit(1.25, -2.5, 3.75, -5.0, a); +Vector4DInit(-1.25, 2.5, -3.75, 5.0, trueRes); +Vector4DNegate(a, dest); +if (!Vector4DIsEqual(dest, trueRes)) +{ + ST_WARN_ST "Vector4DNegate NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector4DPrint(dest); +} else "$$GREEN$$PASS: Vector4DNegate $$FG$$\n"; +//----------------------------------------------------------------------------- +// Vector3DNormalize + +Vector4DInit(1.25, -2.5, 3.75, -5.0, a); +Vector3DInit(0.26726124, -0.5345225, 0.80178374, trueRes); +Vector3DNormalize(a, dest); +if (!Vector3DIsEqual(dest, trueRes)) +{ + ST_WARN_ST "Vector3DNormalize NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector3DPrint(dest); +} else "$$GREEN$$PASS: Vector3DNormalize $$FG$$\n"; + +//----------------------------------------------------------------------------- +// Vector3DDot / Vector4DDot + +Vector4DInit(1.25, -2.5, 3.75, -5.0, a); +Vector4DInit(-1.0, 2.0, -3.0, 4.0, b); + +s = Vector3DDot(a, b); +if (F32ToF64(s) != -17.5) +{ + ST_WARN_ST "Vector3DDot NOT WORKING CORRECLTY! RESULT: %n\n", F32ToF64(s); +} else "$$GREEN$$PASS: Vector3DDot $$FG$$\n"; + +s = Vector4DDot(a, b); +if (F32ToF64(s) != -37.5) +{ + ST_WARN_ST "Vector4DDot NOT WORKING CORRECTLY! RESULT: %n\n", F32ToF64(s); +} else "$$GREEN$$PASS: Vector4DDot $$FG$$\n"; +//----------------------------------------------------------------------------- +// Vector3DCross + +Vector4DInit(1.0, 2.0, 3.0, -5.0, a); +Vector4DInit(1.5, -4.5, 2.5, 4.0, b); +Vector3DInit(18.5, 2.0, -7.5, trueRes); + +Vector3DCross(a, b, dest); +if (!Vector3DIsEqual(dest, trueRes)) +{ + ST_WARN_ST "Vector3DCross NOT WORKING CORRECTLY! RESULT VECTOR:\n"; + Vector3DPrint(dest); +} else "$$GREEN$$PASS: Vector3DCross $$FG$$\n"; +//----------------------------------------------------------------------------- + +Free(a); +Free(b); +Free(dest); +Free(trueRes); + diff --git a/src/System/Math/Types.ZC b/src/System/Math/Types.ZC new file mode 100755 index 00000000..f229e1c2 --- /dev/null +++ b/src/System/Math/Types.ZC @@ -0,0 +1,127 @@ +/** + @defgroup Math Matrix Math + @brief SSE accelerated matrix math. +*/ + +/** + @ingroup Math + @brief Integer 2D bounding box. +*/ +class CBoundingBox2D +{ + I32 x0, y0, x1, y1; +}; + +/** + @ingroup Math + @brief Single-precision float. +*/ +I32 class F32 {}; + +/** + @ingroup Math + @brief Single-precision floating point 2D vector. + + Must be kept on 16-byte alignment in memory. +*/ +class CVector2D +{ + I32 x, y, z, w; +}; + +/** + @ingroup Math + @brief Single-precision floating point 3D vector. + + Must be kept on 16-byte alignment in memory. +*/ +class CVector3D +{ + I32 x, y, z, w; +}; + +/** + @ingroup Math + @brief Single-precision floating point 4D vector. + + Must be kept on 16-byte alignment in memory. +*/ +class CVector4D +{ + I32 x, y, z, w; +}; + +/** + @ingroup Math + @brief Single-precision floating point 4x4 matrix. + + Stored in column-major. Must be kept on 16 byte alignment in memory. +*/ +union CMatrix4D +{ + I32 e[16]; + CVector4D vec[4]; +}; + +// Commonly used constants + +#define F32_NEGATE_MASK 0x80000000 // Xor (^) with F32 to negate +#define F32_ZERO 0x00000000 +#define F32_ONE 0x3F800000 +#define F32_PI 0x40490FDB +#define F32_PI_OVER_180 0x3C8EFA35 +#define F32_180_OVER_PI 0x42652EE1 +#define F32_PI_OVER_2 0x3FC90FDB // Pi / 2 +#define F32_2_OVER_PI 0x3F22F983 // 2 / Pi + +// Indexes into CMatrix4D element array (CMatrix4D.e[16]). +#define MAT4_00 0 +#define MAT4_01 4 +#define MAT4_02 8 +#define MAT4_03 12 +#define MAT4_10 1 +#define MAT4_11 5 +#define MAT4_12 9 +#define MAT4_13 13 +#define MAT4_20 2 +#define MAT4_21 6 +#define MAT4_22 10 +#define MAT4_23 14 +#define MAT4_30 3 +#define MAT4_31 7 +#define MAT4_32 11 +#define MAT4_33 15 + +#define SSE_SIZE 4 +#define AVX_SIZE 8 + +#define NDARRAY_I32 0 +#define NDARRAY_I64 1 +#define NDARRAY_F32 2 +#define NDARRAY_F64 3 + +/** + @ingroup Math + @brief N-Dimensional array. + + Array stored in column-major on 16 byte alignment in memory. +*/ +class CArray +{ + U16 dims; // Dimensions + U16 type; // See NDARRAY_ macros + U64 *dimLens; // Dimension lengths + U64 size; // Total elements + I32 *I32Buf; + I64 *I64Buf; + F32 *F32Buf; + F64 *F64Buf; +}; + +CVector4D *gVec4Temp0 = MAllocAligned(sizeof(CVector4D), 16); +CVector4D *gVec4Temp1 = MAllocAligned(sizeof(CVector4D), 16); +CVector4D *gVec4Temp2 = MAllocAligned(sizeof(CVector4D), 16); +CVector4D *gVec4Temp3 = MAllocAligned(sizeof(CVector4D), 16); + +CMatrix4D *gMat4Temp1 = MAllocAligned(sizeof(CMatrix4D), 16); +CMatrix4D *gMat4Temp2 = MAllocAligned(sizeof(CMatrix4D), 16); diff --git a/src/System/Math/Vec3.ZC b/src/System/Math/Vec3.ZC new file mode 100755 index 00000000..7b61dda0 --- /dev/null +++ b/src/System/Math/Vec3.ZC @@ -0,0 +1,328 @@ + +asm +{ +_VECTOR_3D_INIT:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG4[RBP] // = *dest + + CVTSD2SS XMM0, SF_ARG1[RBP] + MOVSS [RAX], XMM0 + + CVTSD2SS XMM0, SF_ARG2[RBP] + MOVSS 4[RAX], XMM0 + + CVTSD2SS XMM0, SF_ARG3[RBP] + MOVSS 8[RAX], XMM0 + + POP RBP + RET1 32 +} +/** + @ingroup Math + @brief Initialize members of a vector with double-precision floats. + + @param[in] x X component. + @param[in] y Y component. + @param[in] z Z component. + @param[in,out] dest Destination +*/ +_extern _VECTOR_3D_INIT U0 Vector3DInit(F64 x, F64 y, F64 z, CVector3D *dest); + +/** + @ingroup Math + @brief Print members of a vector. + + @param[in] v Vector +*/ +U0 Vector3DPrint(CVector3D *v) +{ + U8 reg R15 str = "%n\t%n\t%n\n\n"; +asm +{ + MOV RAX, SF_ARG1[RBP] // = *v + SUB RSP, 24 + + CVTSS2SD XMM0, 8[RAX] + MOVSD_SSE 16[RSP], XMM0 + + CVTSS2SD XMM0, 4[RAX] + MOVSD_SSE 8[RSP], XMM0 + + CVTSS2SD XMM0, [RAX] + MOVSD_SSE [RSP], XMM0 + + PUSH 3 // # of varargs + PUSH R15 + CALL &Print + ADD RSP, 40 +} +} + +/** + @ingroup Math + @brief Copy all members of a vector to destination. + + @param[in] src Source + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_COPY U0 Vector3DCopy(CVector3D *src, CVector3D *dest); + +asm +{ +_VECTOR_3D_IS_EQUAL:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] // = *a + MOVAPS XMM0, [RAX] + MOV RAX, SF_ARG2[RBP] // = *b + MOVAPS XMM1, [RAX] + CMPPS XMM0, XMM1, 0 // CMPEQPS + + PMOVMSKB RAX, XMM0 + AND RAX, 0xFFF + CMP RAX, 0xFFF + JNZ _is_not_equal + MOV RAX, 1 + JMP _return +_is_not_equal: + MOV RAX, 0 + +_return: + POP RBP + RET1 16 +} +/** + @ingroup Math + @brief Checks if two vectors are equal. + + @param[in] a Vector 1 + @param[in] b Vector 2 +*/ +_extern _VECTOR_3D_IS_EQUAL Bool Vector3DIsEqual(CVector3D *a, CVector3D *b); + +/** + @ingroup Math + @brief Sum of two vectors. + + @param[in] a Vector 1 + @param[in] b Vector 2 + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_ADD U0 Vector3DAdd(CVector3D *a, CVector3D *b, CVector3D *dest); + +/** + @ingroup Math + @brief Add a scalar to a vector. + + @param[in] v Vector + @param[in] s Scalar + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_ADDS U0 Vector3DAddS(CVector3D *v, I32 *s, CVector3D *dest); + +/** + @ingroup Math + @brief Difference of two vectors. + + @param[in] a Vector 1 + @param[in] b Vector 2 + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_SUB U0 Vector3DSub(CVector3D *a, CVector3D *b, CVector3D *dest); + +/** + @ingroup Math + @brief Subtract a scalar from a vector. + + @param[in] v Vector + @param[in] s Scalar + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_SUBS U0 Vector3DSubS(CVector3D *v, I32 *s, CVector3D *dest); + +/** + @ingroup Math + @brief Product of two vectors (element multiplication). + + @param[in] a Vector 1 + @param[in] b Vector 2 + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_MUL U0 Vector3DMul(CVector3D *a, CVector3D *b, CVector3D *dest); + +/** + @ingroup Math + @brief Scale a vector by a scalar. + + @param[in] v Vector + @param[in] s Scalar + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_MULS U0 Vector3DMulS(CVector3D *v, I32 *s, CVector3D *dest); + +/** + @ingroup Math + @brief Quotient of two vectors. + + @param[in] a Vector 1 + @param[in] b Vector 2 + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_DIV U0 Vector3DDiv(CVector3D *a, CVector3D *b, CVector3D *dest); + +/** + @ingroup Math + @brief Divide a vector by a scalar. + + @param[in] v Vector + @param[in] s Scalar + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_DIVS U0 Vector3DDivS(CVector3D *v, I32 *s, CVector3D *dest); + +/** + @ingroup Math + @brief Min of two vectors (element-wise). + + @param[in] a Vector 1 + @param[in] b Vector 2 + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_MIN U0 Vector3DMin(CVector3D *a, CVector3D *b, CVector3D *dest); + +/** + @ingroup Math + @brief Max of two vectors (element-wise). + + @param[in] a Vector 1 + @param[in] b Vector 2 + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_MAX U0 Vector3DMax(CVector3D *a, CVector3D *b, CVector3D *dest); + +/** + @ingroup Math + @brief Negate a vector (elements = -elements). + + @param[in] v Vector + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_NEGATE U0 Vector3DNegate(CVector3D *v, CVector3D *dest); + +asm +{ +_VECTOR_3D_NORMALIZE:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOVAPS XMM2, XMM0 + MULPS XMM0, XMM2 + + MOVHLPS XMM1, XMM0 + ADDSS XMM1, XMM0 + SHUFPS XMM0, XMM0, 0x55 // (1, 1, 1, 1) + ADDSS XMM0, XMM1 + SQRTSS XMM0, XMM0 + SHUFPS XMM0, XMM0, 0x00 // (0, 0, 0, 0) + DIVPS XMM2, XMM0 + + MOV RAX, SF_ARG2[RBP] + MOVAPS [RAX], XMM2 + + POP RBP + RET1 16 +} +/** + @ingroup Math + @brief Normalize a vector (length = 1.0). + + @param[in] v Vector + @param[in,out] dest Destination +*/ +_extern _VECTOR_3D_NORMALIZE U0 Vector3DNormalize(CVector3D *v, CVector3D *dest); + +asm +{ +_VECTOR_3D_DOT:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOV RAX, SF_ARG2[RBP] + MOVAPS XMM1, [RAX] + MULPS XMM0, XMM1 + + MOVHLPS XMM1, XMM0 + ADDSS XMM1, XMM0 + SHUFPS XMM0, XMM0, 0x55 // (1, 1, 1, 1) + ADDSS XMM0, XMM1 + MOVQ RAX, XMM0 + + POP RBP + RET1 16 +} +/** + @ingroup Math + @brief Dot product of two vectors. + + @param[in] a Vector 1 + @param[in] b Vector 2 + @return Dot product. +*/ +_extern _VECTOR_3D_DOT I32 Vector3DDot(CVector3D *a, CVector3D *b); + +asm +{ +_VECTOR_3D_CROSS:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOVAPS XMM1, XMM0 + MOV RAX, SF_ARG2[RBP] + MOVAPS XMM2, [RAX] + MOVAPS XMM3, XMM2 + + SHUFPS XMM0, XMM0, 0xC9 // (1, 2, 0, 3) [Ay Az Ax] + SHUFPS XMM1, XMM1, 0xD2 // (2, 0, 1, 3) [Az Ax Ay] + SHUFPS XMM2, XMM2, 0xD2 // (2, 0, 1, 3) [Bz Bx By] + SHUFPS XMM3, XMM3, 0xC9 // (1, 2, 0, 3) [By Bz Bx] + + MULPS XMM0, XMM2 + MULPS XMM1, XMM3 + SUBPS XMM0, XMM1 + + MOV RAX, SF_ARG3[RBP] + MOVAPS [RAX], XMM0 + + POP RBP + RET1 24 +} +/** + @ingroup Math + @brief Cross product of two vectors. + + @param[in] a Vector 1 + @param[in] b Vector 2 + @param[in,out] dest Destination +*/ +_extern _VECTOR_3D_CROSS U0 Vector3DCross(CVector3D *a, CVector3D *b, CVector3D *dest); + + + + + + + + + + + + diff --git a/src/System/Math/Vec4.ZC b/src/System/Math/Vec4.ZC new file mode 100755 index 00000000..c9a16f61 --- /dev/null +++ b/src/System/Math/Vec4.ZC @@ -0,0 +1,458 @@ + +asm +{ +_VECTOR_4D_INIT:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG5[RBP] // = *dest + + CVTSD2SS XMM0, SF_ARG1[RBP] + MOVSS [RAX], XMM0 + + CVTSD2SS XMM0, SF_ARG2[RBP] + MOVSS 4[RAX], XMM0 + + CVTSD2SS XMM0, SF_ARG3[RBP] + MOVSS 8[RAX], XMM0 + + CVTSD2SS XMM0, SF_ARG4[RBP] + MOVSS 12[RAX], XMM0 + + POP RBP + RET1 40 +} +/** + @ingroup gfxmath + @brief Initialize members of a vector with double-precision floats. + + @param[in] x X component. + @param[in] y Y component. + @param[in] z Z component. + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_INIT U0 Vector4DInit(F64 x, F64 y, F64 z, F64 w, CVector4D *dest); + +#define MATH_VECTOR_4D_STR "%n\t%n\t%n\t%n\n\n" + +/** + @ingroup gfxmath + @brief Print members of a vector. + + @param[in] v Vector +*/ +U0 Vector4DPrint(CVector4D *v) +{ + U8 reg R15 str = "%n\t%n\t%n\t%n\n\n"; +asm +{ + MOV RAX, SF_ARG1[RBP] // = *v + SUB RSP, 32 + + CVTSS2SD XMM0, 12[RAX] + MOVSD_SSE 24[RSP], XMM0 + + CVTSS2SD XMM0, 8[RAX] + MOVSD_SSE 16[RSP], XMM0 + + CVTSS2SD XMM0, 4[RAX] + MOVSD_SSE 8[RSP], XMM0 + + CVTSS2SD XMM0, [RAX] + MOVSD_SSE [RSP], XMM0 + + PUSH 4 // # of varargs + PUSH R15 + CALL &Print + ADD RSP, 48 +} +} + +asm +{ +_VECTOR_4D_COPY:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] // = *src + MOVAPS XMM0, [RAX] + MOV RAX, SF_ARG2[RBP] // = *dest + MOVAPS [RAX], XMM0 + + POP RBP + RET1 16 +} +/** + @ingroup gfxmath + @brief Copy all members of a vector to destination. + + @param[in] src Source + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_COPY U0 Vector4DCopy(CVector4D *src, CVector4D *dest); + +asm +{ +_VECTOR_4D_IS_EQUAL:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] // = *a + MOVAPS XMM0, [RAX] + MOV RAX, SF_ARG2[RBP] // = *b + MOVAPS XMM1, [RAX] + CMPPS XMM0, XMM1, 0 // CMPEQPS + + PMOVMSKB RAX, XMM0 + + AND RAX, 0xFFFF + CMP RAX, 0xFFFF + JNZ _is_not_equal + MOV RAX, 1 + JMP _return +_is_not_equal: + MOV RAX, 0 + +_return: + POP RBP + RET1 16 +} +/** + @ingroup gfxmath + @brief Checks if two vectors are equal. + + @param[in] a Vector 1 + @param[in] b Vector 2 + @return TRUE if equal. +*/ +_extern _VECTOR_4D_IS_EQUAL Bool Vector4DIsEqual(CVector4D *a, CVector4D *b); + +asm +{ +_VECTOR_4D_ADD:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOV RAX, SF_ARG2[RBP] + MOVAPS XMM1, [RAX] + ADDPS XMM0, XMM1 + MOV RAX, SF_ARG3[RBP] + MOVAPS [RAX], XMM0 + + POP RBP + RET1 24 +} +/** + @ingroup gfxmath + @brief Sum of two vectors. + + @param[in] a Vector 1 + @param[in] b Vector 2 + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_ADD U0 Vector4DAdd(CVector4D *a, CVector4D *b, CVector4D *dest); + +asm +{ +_VECTOR_4D_ADDS:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOVSS XMM1, SF_ARG2[RBP] + SHUFPS XMM1, XMM1, 0 + ADDPS XMM0, XMM1 + MOV RAX, SF_ARG3[RBP] + MOVAPS [RAX], XMM0 + + POP RBP + RET1 24 +} +/** + @ingroup gfxmath + @brief Add a scalar to a vector. + + @param[in] v Vector + @param[in] s Scalar + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_ADDS U0 Vector4DAddS(CVector4D *v, F32 *s, CVector4D *dest); + +asm +{ +_VECTOR_4D_SUB:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOV RAX, SF_ARG2[RBP] + MOVAPS XMM1, [RAX] + SUBPS XMM0, XMM1 + MOV RAX, SF_ARG3[RBP] + MOVAPS [RAX], XMM0 + + POP RBP + RET1 24 +} +/** + @ingroup gfxmath + @brief Difference of two vectors. + + @param[in] a Vector 1 + @param[in] b Vector 2 + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_SUB U0 Vector4DSub(CVector4D *a, CVector4D *b, CVector4D *dest); + +asm +{ +_VECTOR_4D_SUBS:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOVSS XMM1, SF_ARG2[RBP] + SHUFPS XMM1, XMM1, 0 + SUBPS XMM0, XMM1 + MOV RAX, SF_ARG3[RBP] + MOVAPS [RAX], XMM0 + + POP RBP + RET1 24 +} +/** + @ingroup gfxmath + @brief Subtract a scalar from a vector. + + @param[in] v Vector + @param[in] s Scalar + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_SUBS U0 Vector4DSubS(CVector4D *v, F32 *s, CVector4D *dest); + +asm +{ +_VECTOR_4D_MUL:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOV RAX, SF_ARG2[RBP] + MOVAPS XMM1, [RAX] + MULPS XMM0, XMM1 + MOV RAX, SF_ARG3[RBP] + MOVAPS [RAX], XMM0 + + POP RBP + RET1 24 +} +/** + @ingroup gfxmath + @brief Product of two vectors (element multiplication). + + @param[in] a Vector 1 + @param[in] b Vector 2 + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_MUL U0 Vector4DMul(CVector4D *a, CVector4D *b, CVector4D *dest); + +asm +{ +_VECTOR_4D_MULS:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOVSS XMM1, SF_ARG2[RBP] + SHUFPS XMM1, XMM1, 0 + MULPS XMM0, XMM1 + MOV RAX, SF_ARG3[RBP] + MOVAPS [RAX], XMM0 + + POP RBP + RET1 24 +} +/** + @ingroup gfxmath + @brief Scale a vector by a scalar. + + @param[in] v Vector + @param[in] s Scalar + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_MULS U0 Vector4DMulS(CVector4D *v, F32 *s, CVector4D *dest); + + +asm +{ +_VECTOR_4D_DIV:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOV RAX, SF_ARG2[RBP] + MOVAPS XMM1, [RAX] + DIVPS XMM0, XMM1 + MOV RAX, SF_ARG3[RBP] + MOVAPS [RAX], XMM0 + + POP RBP + RET1 24 +} +/** + @ingroup gfxmath + @brief Quotient of two vectors. + + @param[in] a Vector 1 + @param[in] b Vector 2 + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_DIV U0 Vector4DDiv(CVector4D *a, CVector4D *b, CVector4D *dest); + +asm +{ +_VECTOR_4D_DIVS:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOVSS XMM1, SF_ARG2[RBP] + SHUFPS XMM1, XMM1, 0 + DIVPS XMM0, XMM1 + MOV RAX, SF_ARG3[RBP] + MOVAPS [RAX], XMM0 + + POP RBP + RET1 24 +} +/** + @ingroup gfxmath + @brief Scale a vector by a scalar. + + @param[in] v Vector + @param[in] s Scalar + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_DIVS U0 Vector4DDivS(CVector4D *v, F32 *s, CVector4D *dest); + +asm +{ +_VECTOR_4D_MIN:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOV RAX, SF_ARG2[RBP] + MOVAPS XMM1, [RAX] + MINPS XMM0, XMM1 + MOV RAX, SF_ARG3[RBP] + MOVAPS [RAX], XMM0 + + POP RBP + RET1 24 +} +/** + @ingroup gfxmath + @brief Min of two vectors (element-wise). + + @param[in] a Vector 1 + @param[in] b Vector 2 + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_MIN U0 Vector4DMin(CVector4D *a, CVector4D *b, CVector4D *dest); + +asm +{ +_VECTOR_4D_MAX:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOV RAX, SF_ARG2[RBP] + MOVAPS XMM1, [RAX] + MAXPS XMM0, XMM1 + MOV RAX, SF_ARG3[RBP] + MOVAPS [RAX], XMM0 + + POP RBP + RET1 24 + +} +/** + @ingroup gfxmath + @brief Max of two vectors (element-wise). + + @param[in] a Vector 1 + @param[in] b Vector 2 + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_MAX U0 Vector4DMax(CVector4D *a, CVector4D *b, CVector4D *dest); + +asm +{ +_VECTOR_4D_NEGATE:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM1,[RAX] + XORPS XMM0, XMM0 + SUBPS XMM0, XMM1 + MOV RAX, SF_ARG2[RBP] + MOVAPS [RAX], XMM0 + + POP RBP + RET1 16 +} +/** + @ingroup gfxmath + @brief Negate a vector (elements = -elements). + + @param[in] v Vector + @param[in,out] dest Destination +*/ +_extern _VECTOR_4D_NEGATE U0 Vector4DNegate(CVector4D *v, CVector4D *dest); + +asm +{ +_VECTOR_4D_DOT:: + PUSH RBP + MOV RBP, RSP + + MOV RAX, SF_ARG1[RBP] + MOVAPS XMM0, [RAX] + MOV RAX, SF_ARG2[RBP] + MOVAPS XMM1, [RAX] + MULPS XMM0, XMM1 + + MOVHLPS XMM1, XMM0 + ADDPS XMM0, XMM1 + MOVSS XMM1, XMM0 + SHUFPS XMM0, XMM0, 0x55 // (1, 1, 1, 1) + ADDSS XMM0, XMM1 + MOVQ RAX, XMM0 + + POP RBP + RET1 16 +} +/** + @ingroup gfxmath + @brief Dot product of two vectors. + + @param[in] a Vector 1 + @param[in] b Vector 2 + @return Dot product. +*/ +_extern _VECTOR_4D_DOT I32 Vector4DDot(CVector4D *a, CVector4D *b); +