ZealOS/src/Home/SSE.ZC
2022-03-26 00:59:25 -04:00

384 lines
7.4 KiB
HolyC
Executable file

/*
The ZealC assembler currently has partial SSE support.
SSE instructions with no prefix are fully supported,
but instructions prefixed with 0x66, 0xF2, or 0xF3
can only be assembled using the lower registers,
XMM0-XMM7 and RAX-RDI.
SSE instructions supporting XMM0-XMM15 RAX-R15:
_________________________________________________
MOVAPS MOVUPS MOVLPS MOVHPS MOVLHPS MOVHLPS
MOVNTI MOVNTPS MOVMSKPS
RCPPS ANDPS ANDNPS ADDPS SUBPS MULPS
DIVPS MINPS MAXPS ORPS XORPS SQRTPS
RSQRTPS CMPPS SHUFPS
CVTPS2PD CVTDQ2PS CVTPI2PS
COMISS UCOMISS UNPCKLPS UNPCKHPS
_________________________________________________
SSE instructions not in the above list most likely
can only be assembled using XMM0-XMM7 RAX-RDI.
*/
I64 DemoAllSSE()
{// Not meant to be run, just to
// test Assembler and Unassembler against.
// Will likely cause General Protection crash if run.
// Some SSE ops require 16-byte aligned vals or else crash.
I64 reg RDX res = 0;
LFENCE
MFENCE
SFENCE
LDMXCSR [RDX]
STMXCSR [RDX]
LDDQU XMM0, [RDX]
MOVAPS XMM0, XMM15
MOVAPD XMM0, XMM1
MOVUPS XMM0, XMM15
MOVUPD XMM0, XMM1
MOVSS XMM0, XMM1
MOVSD_SSE XMM0, XMM1
MOVD XMM0, ESI
MOVQ XMM0, RDX
MOVLPS XMM15, [RDX]
MOVLPD XMM0, [RDX]
MOVHPS XMM0, [RDX]
MOVHPD XMM0, [RDX]
MOVDQA XMM0, XMM1
MOVDQU XMM0, XMM1
MOVDDUP XMM0, XMM1
MOVSLDUP XMM0, XMM1
MOVSHDUP XMM0, XMM1
MOVLHPS XMM0, XMM15
MOVHLPS XMM0, XMM15
MOVNTI [RDX], R13
// MOVNTPS [RDX], XMM0
// MOVNTPD [RDX], XMM0
// MOVNTDQ [RDX], XMM1
MOVMSKPS RDX, XMM15
MOVMSKPD RDX, XMM0
PMOVMSKB RDX, XMM0
PMOVSXBW XMM0, XMM1
PMOVSXBD XMM0, XMM1
// PMOVSXBQ XMM0, XMM1
PMOVSXWD XMM0, XMM1
PMOVSXWQ XMM0, XMM1
PMOVSXDQ XMM0, XMM1
PMOVZXBW XMM0, XMM1
PMOVZXBD XMM0, XMM1
// PMOVZXBQ XMM0, XMM1
PMOVZXWD XMM0, XMM1
PMOVZXWQ XMM0, XMM1
PMOVZXDQ XMM0, XMM1
HADDPS XMM0, XMM1
HADDPD XMM0, XMM1
HSUBPS XMM0, XMM1
HSUBPD XMM0, XMM1
ADDSUBPS XMM0, XMM1
ADDSUBPD XMM0, XMM1
RCPSS XMM0, XMM1
RCPPS XMM0, XMM15
ANDPS XMM0, XMM15
ANDPD XMM0, XMM1
ANDNPS XMM0, XMM15
ANDNPD XMM0, XMM1
ADDSS XMM0, XMM1
ADDSD XMM0, XMM1
ADDPS XMM0, XMM15
ADDPD XMM0, XMM1
SUBSS XMM0, XMM1
SUBSD XMM0, XMM1
SUBPS XMM0, XMM15
SUBPD XMM0, XMM1
MULSS XMM0, XMM1
MULSD XMM0, XMM1
MULPS XMM0, XMM15
MULPD XMM0, XMM1
DIVSS XMM0, XMM0
DIVSD XMM0, XMM1
DIVPS XMM0, XMM15
DIVPD XMM0, XMM1
MINSS XMM0, XMM1
MINSD XMM0, XMM1
MINPS XMM0, XMM15
MINPD XMM0, XMM1
MAXSS XMM0, XMM1
MAXSD XMM0, XMM1
MAXPS XMM0, XMM15
MAXPD XMM0, XMM1
ORPS XMM0, XMM15
ORPD XMM0, XMM1
XORPS XMM0, XMM15
XORPD XMM0, XMM1
SQRTSS XMM0, XMM1
SQRTSD XMM0, XMM1
SQRTPS XMM0, XMM15
SQRTPD XMM0, XMM1
RSQRTSS XMM0, XMM1
RSQRTPS XMM0, XMM15
CVTSD2SS XMM0, XMM1
CVTSS2SD XMM0, XMM1
CVTSS2SI RDX, XMM0
CVTSI2SD XMM0, RDX
CVTSI2SS XMM0, RDX
CVTSD2SI RDX, XMM1
CVTPS2PD XMM0, XMM15
CVTDQ2PS XMM0, XMM15
CVTPS2DQ XMM0, XMM1
CVTPD2DQ XMM0, XMM1
CVTDQ2PD XMM0, XMM1
CVTPD2PS XMM0, XMM9
CVTPI2PS XMM15, [RDX]
CVTPI2PD XMM0, [RDX]
CVTTSS2SI RDX, XMM1
CVTTSD2SI RDX, XMM0
CVTTPS2DQ XMM0, XMM1
CVTTPD2DQ XMM0, XMM1
COMISS XMM0, XMM15
COMISD XMM0, XMM1
UCOMISS XMM0, XMM15
UCOMISD XMM0, XMM1
UNPCKLPS XMM0, XMM15
UNPCKLPD XMM0, XMM1
UNPCKHPS XMM0, XMM15
UNPCKHPD XMM0, XMM1
MASKMOVDQU XMM0, XMM1
CMPSS XMM0, XMM1, 0x74
CMPSD_SSE XMM0, XMM1, 0x63
CMPPS XMM0, XMM15, 0x52
CMPPD XMM0, XMM1, 0x41
SHUFPS XMM0, XMM15, 0x30
SHUFPD XMM0, XMM1, 0x29
PSHUFB XMM0, XMM1
PSHUFD XMM0, XMM1, 0x18
PSHUFLW XMM0, XMM1, 0x07
PSHUFHW XMM0, XMM1, 0x96
ROUNDSS XMM0, XMM1, 0x85
ROUNDSD XMM0, XMM1, 0x74
ROUNDPS XMM0, XMM1, 0x63
ROUNDPD XMM0, XMM1, 0x52
BLENDVPS XMM0, XMM1
BLENDVPD XMM0, XMM1
BLENDPS XMM0, XMM1, 0x99
BLENDPD XMM0, XMM1, 0x99
PBLENDW XMM0, XMM1, 0x99
DPPS XMM0, XMM1, 0x99
DPPD XMM0, XMM1, 0x99
PALIGNR XMM0, XMM1, 0x99
PCLMULQDQ XMM0, XMM1, 0x99
PEXTRB AH, XMM1, 0x99
PEXTRW RDX, XMM1, 0x99
PEXTRD ESI, XMM1, 0x99
PEXTRQ RDX, XMM1, 0x99
EXTRACTPS ESI, XMM1, 0x99
PINSRB XMM0, AH, 0x99
PINSRW XMM0, AX, 0x99
PINSRD XMM0, ESI, 0x99
PINSRQ XMM0, RDX, 0x99
PCMPESTRM XMM0, XMM1, 0x99
PCMPESTRI XMM0, XMM1, 0x99
PCMPISTRM XMM0, XMM1, 0x99
PCMPISTRI XMM0, XMM1, 0x99
PCMPGTB XMM0, XMM1
PCMPGTW XMM0, XMM1
PCMPGTD XMM0, XMM1
PCMPGTQ XMM0, XMM1
PCMPEQB XMM0, XMM1
PCMPEQW XMM0, XMM1
PCMPEQD XMM0, XMM1
PCMPEQQ XMM0, XMM1
PSRLW XMM0, XMM1
PSRLD XMM0, XMM1
PSRLQ XMM0, XMM1
PSLLW XMM0, XMM1
PSLLD XMM0, XMM1
PSLLQ XMM0, XMM1
PSRAW XMM0, XMM1
PSRAD XMM0, XMM1
PAVGB XMM0, XMM1
PAVGW XMM0, XMM1
PABSB XMM0, XMM1
PABSW XMM0, XMM1
PABSD XMM0, XMM1
PAND XMM0, XMM1
PANDN XMM0, XMM1
PHADDW XMM0, XMM1
PHADDD XMM0, XMM1
PHADDSW XMM0, XMM1
PADDUSB XMM0, XMM1
PADDUSW XMM0, XMM1
PADDSB XMM0, XMM1
PADDSW XMM0, XMM1
PHSUBW XMM0, XMM1
PHSUBD XMM0, XMM1
PHSUBSW XMM0, XMM1
PSUBUSB XMM0, XMM1
PSUBUSW XMM0, XMM1
PSUBSB XMM0, XMM1
PSUBSW XMM0, XMM1
PADDB XMM0, XMM1
PADDW XMM0, XMM1
PADDD XMM0, XMM1
PADDQ XMM0, XMM1
PSUBB XMM0, XMM1
PSUBW XMM0, XMM1
PSUBD XMM0, XMM1
PSUBQ XMM0, XMM1
PHMINPOSUW XMM0, XMM1
PMINUB XMM0, XMM1
PMINUW XMM0, XMM1
PMINUD XMM0, XMM1
PMINSB XMM0, XMM1
PMINSW XMM0, XMM1
PMINSD XMM0, XMM1
PMAXUB XMM0, XMM1
PMAXUW XMM0, XMM1
PMAXUD XMM0, XMM1
PMAXSB XMM0, XMM1
PMAXSW XMM0, XMM1
PMAXSD XMM0, XMM1
PMULLW XMM0, XMM1
PMULLD XMM0, XMM1
PMULHRSW XMM0, XMM1
PMULHUW XMM0, XMM1
PMULHW XMM0, XMM1
PMULUDQ XMM0, XMM1
PMULDQ XMM0, XMM1
PMADDWD XMM0, XMM1
PMADDUBSW XMM0, XMM1
PTEST XMM0, XMM1
PSLLDQ XMM0, 0x11
PSRLDQ XMM0, 0x22
PSIGNB XMM0, XMM1
PSIGNW XMM0, XMM1
PSIGND XMM0, XMM1
PXOR XMM0, XMM1
PACKSSWB XMM0, XMM1
PACKUSWB XMM0, XMM1
PACKSSDW XMM0, XMM1
PACKUSDW XMM0, XMM1
PUNPCKLBW XMM0, XMM1
PUNPCKLWD XMM0, XMM1
PUNPCKLDQ XMM0, XMM1
PUNPCKLQDQ XMM0, XMM1
PUNPCKHBW XMM0, XMM1
PUNPCKHWD XMM0, XMM1
PUNPCKHDQ XMM0, XMM1
PUNPCKHQDQ XMM0, XMM1
PSADBW XMM0, XMM1
MPSADBW XMM0, XMM1, 0x21
INSERTPS XMM0, XMM1, 0x32
PREFETCHT0 [RDX]
PREFETCHT1 [RDX]
PREFETCHT2 [RDX]
PREFETCHNTA [RDX]
return res;
}
"\n$$BK,1$$Unassembling all SSE ops, note errors:\n$$BK,0$$";
Uf("DemoAllSSE");
U0 DumpXMM()
{ // Dump XMM registers
I64 reg RAX quad;
PEXTRQ RAX, XMM0, 1
"XMM0: 0x%016X", quad;
PEXTRQ RAX, XMM0, 0
"%016X\n", quad;
PEXTRQ RAX, XMM1, 1
"XMM1: 0x%016X", quad;
PEXTRQ RAX, XMM1, 0
"%016X\n", quad;
PEXTRQ RAX, XMM2, 1
"XMM2: 0x%016X", quad;
PEXTRQ RAX, XMM2, 0
"%016X\n", quad;
PEXTRQ RAX, XMM3, 1
"XMM3: 0x%016X", quad;
PEXTRQ RAX, XMM3, 0
"%016X\n", quad;
PEXTRQ RAX, XMM4, 1
"XMM4: 0x%016X", quad;
PEXTRQ RAX, XMM4, 0
"%016X\n", quad;
PEXTRQ RAX, XMM5, 1
"XMM5: 0x%016X", quad;
PEXTRQ RAX, XMM5, 0
"%016X\n", quad;
PEXTRQ RAX, XMM6, 1
"XMM6: 0x%016X", quad;
PEXTRQ RAX, XMM6, 0
"%016X\n", quad;
PEXTRQ RAX, XMM7, 1
"XMM7: 0x%016X", quad;
PEXTRQ RAX, XMM7, 0
"%016X\n", quad;
"\n\n";
}
"\n\nDump XMM Registers function definition:\n";
Uf("DumpXMM");
"\n\n";
I64 DemoSSE()
{
I64 reg RDX res = 0;
MOV RDX, 0x3939393939393939
MOVQ XMM0, RDX
MOV RDX, 0x7777777777777777
MOVQ XMM1, RDX
MOV RDX, 0x2021202120212021
MOVQ XMM2, RDX
MOV RDX, 0x0123456789012345
MOVQ XMM3, RDX
MOV RDX, 0x0000400000005000
MOVQ XMM6, RDX
MOV RDX, 0x0000000300000002
MOVQ XMM7, RDX
DumpXMM;
PSLLDQ XMM0, 8
PSLLDQ XMM1, 8
PSLLDQ XMM2, 8
PSLLDQ XMM3, 8
PHADDD XMM6, XMM7
DumpXMM;
return res;
}
"\n$$BK,1$$Unassembling and running SSE demo.\n$$BK,0$$";
Uf("DemoSSE"); "\n";
DemoSSE;