asm { //************************************ _D3_NORM:: PUSH RBP MOV RBP, RSP MOV RDX, U64 SF_ARG1[RBP] FLD U64 [RDX] FMUL ST0, ST0 FLD U64 8[RDX] FMUL ST0, ST0 FADDP ST1, ST0 FLD U64 16[RDX] FMUL ST0, ST0 FADDP ST1, ST0 FSQRT FSTP U64 SF_ARG1[RBP] MOV RAX, U64 SF_ARG1[RBP] POP RBP RET1 8 //************************************ _D3_NORM_SQR:: PUSH RBP MOV RBP, RSP MOV RDX, U64 SF_ARG1[RBP] FLD U64 [RDX] FMUL ST0, ST0 FLD U64 8[RDX] FMUL ST0, ST0 FADDP ST1, ST0 FLD U64 16[RDX] FMUL ST0, ST0 FADDP ST1, ST0 FSTP U64 SF_ARG1[RBP] MOV RAX, U64 SF_ARG1[RBP] POP RBP RET1 8 //************************************ _D3_DIST:: PUSH RBP MOV RBP, RSP MOV RDX, U64 SF_ARG1[RBP] MOV RBX, U64 SF_ARG2[RBP] FLD U64 [RDX] FSUB ST0, U64 [RBX] FMUL ST0, ST0 FLD U64 8[RDX] FSUB ST0, U64 8[RBX] FMUL ST0, ST0 FADDP ST1, ST0 FLD U64 16[RDX] FSUB ST0, U64 16[RBX] FMUL ST0, ST0 FADDP ST1, ST0 FSQRT FSTP U64 SF_ARG1[RBP] MOV RAX, U64 SF_ARG1[RBP] POP RBP RET1 16 //************************************ _D3_DIST_SQR:: PUSH RBP MOV RBP, RSP MOV RDX, U64 SF_ARG1[RBP] MOV RBX, U64 SF_ARG2[RBP] FLD U64 [RDX] FSUB ST0, U64 [RBX] FMUL ST0, ST0 FLD U64 8[RDX] FSUB ST0, U64 8[RBX] FMUL ST0, ST0 FADDP ST1, ST0 FLD U64 16[RDX] FSUB ST0, U64 16[RBX] FMUL ST0, ST0 FADDP ST1, ST0 FSTP U64 SF_ARG1[RBP] MOV RAX, U64 SF_ARG1[RBP] POP RBP RET1 16 //************************************ _D3_CROSS:: PUSH RBP MOV RBP, RSP MOV RAX, U64 SF_ARG1[RBP] MOV RDX, U64 SF_ARG2[RBP] MOV RBX, U64 SF_ARG3[RBP] FLD U64 8[RDX] FMUL ST0, U64 16[RBX] FLD U64 16[RDX] FMUL ST0, U64 8[RBX] FSUBP ST1, ST0 FSTP U64 [RAX] FLD U64 [RDX] FMUL ST0, U64 16[RBX] FLD U64 16[RDX] FMUL ST0, U64 [RBX] FSUBRP ST1, ST0 FSTP U64 8[RAX] FLD U64 [RDX] FMUL ST0, U64 8[RBX] FLD U64 8[RDX] FMUL ST0, U64 [RBX] FSUBP ST1, ST0 FSTP U64 16[RAX] POP RBP RET1 24 //************************************ _D3_DOT:: PUSH RBP MOV RBP, RSP MOV RDX, U64 SF_ARG1[RBP] MOV RBX, U64 SF_ARG2[RBP] FLD U64 [RDX] FMUL ST0, U64 [RBX] FLD U64 8[RDX] FMUL ST0, U64 8[RBX] FADDP ST1, ST0 FLD U64 16[RDX] FMUL ST0, U64 16[RBX] FADDP ST1, ST0 FSTP U64 SF_ARG1[RBP] MOV RAX, U64 SF_ARG1[RBP] POP RBP RET1 16 //************************************ _D3_ADD:: PUSH RBP MOV RBP, RSP MOV RAX, U64 SF_ARG1[RBP] MOV RDX, U64 SF_ARG2[RBP] MOV RBX, U64 SF_ARG3[RBP] FLD U64 [RDX] FADD ST0, U64 [RBX] FSTP U64 [RAX] FLD U64 8[RDX] FADD ST0, U64 8[RBX] FSTP U64 8[RAX] FLD U64 16[RDX] FADD ST0, U64 16[RBX] FSTP U64 16[RAX] POP RBP RET1 24 //************************************ _D3_SUB:: PUSH RBP MOV RBP, RSP MOV RAX, U64 SF_ARG1[RBP] MOV RDX, U64 SF_ARG2[RBP] MOV RBX, U64 SF_ARG3[RBP] FLD U64 [RDX] FSUB ST0, U64 [RBX] FSTP U64 [RAX] FLD U64 8[RDX] FSUB ST0, U64 8[RBX] FSTP U64 8[RAX] FLD U64 16[RDX] FSUB ST0, U64 16[RBX] FSTP U64 16[RAX] POP RBP RET1 24 //************************************ _D3_MUL:: PUSH RBP MOV RBP, RSP MOV RAX, U64 SF_ARG1[RBP] MOV RBX, U64 SF_ARG3[RBP] FLD U64 SF_ARG2[RBP] FLD ST0 FMUL ST0, U64 [RBX] FSTP U64 [RAX] FLD ST0 FMUL ST0, U64 8[RBX] FSTP U64 8[RAX] FMUL ST0, U64 16[RBX] FSTP U64 16[RAX] POP RBP RET1 24 //************************************ _D3_DIV:: PUSH RBP MOV RBP, RSP MOV RAX, U64 SF_ARG1[RBP] MOV RBX, U64 SF_ARG2[RBP] FLD U64 SF_ARG3[RBP] FLD ST0 FDIVR ST0, U64 [RBX] FSTP U64 [RAX] FLD ST0 FDIVR ST0, U64 8[RBX] FSTP U64 8[RAX] FDIVR ST0, U64 16[RBX] FSTP U64 16[RAX] POP RBP RET1 24 //************************************ _D3_ZERO:: PUSH RBP MOV RBP, RSP MOV RAX, U64 SF_ARG1[RBP] XOR RDX, RDX MOV U64 [RAX], RDX MOV U64 8[RAX], RDX MOV U64 16[RAX], RDX POP RBP RET1 8 //************************************ _D3_COPY:: PUSH RBP MOV RBP, RSP MOV RAX, U64 SF_ARG1[RBP] MOV RBX, U64 SF_ARG2[RBP] MOV RDX, U64 [RBX] MOV U64 [RAX], RDX MOV RDX, U64 8[RBX] MOV U64 8[RAX], RDX MOV RDX, U64 16[RBX] MOV U64 16[RAX], RDX POP RBP RET1 16 //************************************ _D3_EQU:: PUSH RBP MOV RBP, RSP MOV RAX, U64 SF_ARG1[RBP] MOV RDX, U64 SF_ARG2[RBP] MOV U64 [RAX], RDX MOV RDX, U64 SF_ARG3[RBP] MOV U64 8[RAX], RDX MOV RDX, U64 SF_ARG4[RBP] MOV U64 16[RAX], RDX POP RBP RET1 32 //************************************ _D3_ADD_EQU:: PUSH RBP MOV RBP, RSP MOV RAX, U64 SF_ARG1[RBP] MOV RBX, U64 SF_ARG2[RBP] FLD U64 [RAX] FADD ST0, U64 [RBX] FSTP U64 [RAX] FLD U64 8[RAX] FADD ST0, U64 8[RBX] FSTP U64 8[RAX] FLD U64 16[RAX] FADD ST0, U64 16[RBX] FSTP U64 16[RAX] POP RBP RET1 16 //************************************ _D3_SUB_EQU:: PUSH RBP MOV RBP, RSP MOV RAX, U64 SF_ARG1[RBP] MOV RBX, U64 SF_ARG2[RBP] FLD U64 [RAX] FSUB ST0, U64 [RBX] FSTP U64 [RAX] FLD U64 8[RAX] FSUB ST0, U64 8[RBX] FSTP U64 8[RAX] FLD U64 16[RAX] FSUB ST0, U64 16[RBX] FSTP U64 16[RAX] POP RBP RET1 16 //************************************ _D3_MUL_EQU:: PUSH RBP MOV RBP, RSP MOV RAX, U64 SF_ARG1[RBP] FLD U64 SF_ARG2[RBP] FLD ST0 FMUL ST0, U64 [RAX] FSTP U64 [RAX] FLD ST0 FMUL ST0, U64 8[RAX] FSTP U64 8[RAX] FMUL ST0, U64 16[RAX] FSTP U64 16[RAX] POP RBP RET1 16 //************************************ _D3_DIV_EQU:: PUSH RBP MOV RBP, RSP MOV RAX, U64 SF_ARG1[RBP] FLD U64 SF_ARG2[RBP] FLD ST0 FDIVR ST0, U64 [RAX] FSTP U64 [RAX] FLD ST0 FDIVR ST0, U64 8[RAX] FSTP U64 8[RAX] FDIVR ST0, U64 16[RAX] FSTP U64 16[RAX] POP RBP RET1 16 //************************************ _D3_UNIT:: PUSH RBP MOV RBP, RSP MOV RAX, U64 SF_ARG1[RBP] FLD U64 [RAX] FMUL ST0, ST0 FLD U64 8[RAX] FMUL ST0, ST0 FADDP ST1, ST0 FLD U64 16[RAX] FMUL ST0, ST0 FADDP ST1, ST0 FSQRT FST U64 SF_ARG1[RBP] MOV RBX, SF_ARG1[RBP] TEST RBX, RBX JZ @@05 FLD ST0 FDIVR ST0, U64 [RAX] FSTP U64 [RAX] FLD ST0 FDIVR ST0, U64 8[RAX] FSTP U64 8[RAX] FDIVR ST0, U64 16[RAX] FSTP U64 16[RAX] POP RBP RET1 8 @@05: FFREE ST0 FINCSTP POP RBP RET1 8 //************************************ _SQR:: PUSH RBP MOV RBP, RSP FLD U64 SF_ARG1[RBP] FMUL ST0, ST0 FSTP U64 SF_ARG1[RBP] MOV RAX, U64 SF_ARG1[RBP] POP RBP RET1 8 //************************************ _SIGN:: PUSH RBP MOV RBP, RSP FLD U64 SF_ARG1[RBP] FTST FSTSW FFREE ST0 FINCSTP MOV RDX, RAX XOR RAX, RAX BT RDX, 14 JC @@05 MOV RAX, 1.0 BT RDX, 8 JNC @@05 MOV RAX, -1.0 @@05: POP RBP RET1 8 //************************************ _ARG:: PUSH RBP MOV RBP, RSP FLD U64 SF_ARG2[RBP] FLD U64 SF_ARG1[RBP] @@05: FPATAN FSTSW TEST AX, 0x400 JNZ @@05 FSTP U64 SF_ARG1[RBP] MOV RAX, U64 SF_ARG1[RBP] POP RBP RET1 16 //************************************ _ROUND:: PUSH RBP MOV RBP, RSP SUB RSP, 8 FSTCW U16 -8[RBP] MOV AX, U16 -8[RBP] MOV U16 -6[RBP], AX AND AX,~0x0C00 MOV U16 -8[RBP], AX FLDCW U16 -8[RBP] FLD U64 SF_ARG1[RBP] FRNDINT FSTP U64 SF_ARG1[RBP] MOV RAX, U64 SF_ARG1[RBP] FLDCW U16 -6[RBP] LEAVE RET1 8 //************************************ _TRUNC:: PUSH RBP MOV RBP, RSP SUB RSP, 8 FSTCW U16 -8[RBP] MOV AX, U16 -8[RBP] MOV U16 -6[RBP], AX OR AX, 0x0C00 MOV U16 -8[RBP], AX FLDCW U16 -8[RBP] FLD U64 SF_ARG1[RBP] FRNDINT FSTP U64 SF_ARG1[RBP] MOV RAX, U64 SF_ARG1[RBP] FLDCW U16 -6[RBP] LEAVE RET1 8 //************************************ _FLOOR:: PUSH RBP MOV RBP, RSP SUB RSP, 8 FSTCW U16 -8[RBP] MOV AX, U16 -8[RBP] MOV U16 -6[RBP], AX AND AX, ~0x0C00 OR AX, 0x0400 MOV U16 -8[RBP], AX FLDCW U16 -8[RBP] FLD U64 SF_ARG1[RBP] FRNDINT FSTP U64 SF_ARG1[RBP] MOV RAX, U64 SF_ARG1[RBP] FLDCW U16 -6[RBP] LEAVE RET1 8 //************************************ _CEIL:: PUSH RBP MOV RBP, RSP SUB RSP, 8 FSTCW U16 -8[RBP] MOV AX, U16 -8[RBP] MOV U16 -6[RBP], AX AND AX, ~0x0C00 OR AX, 0x0800 MOV U16 -8[RBP], AX FLDCW U16 -8[RBP] FLD U64 SF_ARG1[RBP] FRNDINT FSTP U64 SF_ARG1[RBP] MOV RAX, SF_ARG1[RBP] FLDCW U16 -6[RBP] LEAVE RET1 8 //************************************ SYS_POW:: // RAX=RDX`RAX PUSH RBP MOV RBP, RSP SUB RSP, 24 TEST RAX, RAX //To power of zero? JNZ @@05 MOV RAX, 1.0 JMP @@20 @@05: TEST RDX, RDX //zero to a power? JNZ @@10 XOR RAX, RAX JMP @@20 @@10: MOV RCX, RAX //RCX=pow MOV U64 -8[RBP], RDX MOV U64 -16[RBP], RAX FSTCW U16 -24[RBP] MOV AX, U16 -24[RBP] MOV U16 -22[RBP], AX OR AX, 0x0C00 MOV U16 -24[RBP], AX FLDCW U16 -24[RBP] FLD U64 -16[RBP] FLD U64 -8[RBP] FABS //base should be pos FYL2X // y*log2(x) FST U64 -8[RBP] FRNDINT FLD U64 -8[RBP] FSUBRP ST1, ST0 F2XM1 FLD1 FADDP ST1, ST0 FSTP U64 -16[RBP] FLD U64 -8[RBP] FLD U64 -16[RBP] FSCALE // ST0<