/* The CosmiC assembler currently has partial SSE support. SSE instructions with no prefix are fully supported, but instructions prefixed with 0x66, 0xF2, or F3 can only be assembled using the lower registers, XMM0-XMM7 and RAX-RDI. SSE instructions supporting XMM0-XMM15 RAX-R15: _________________________________________________ MOVAPS MOVUPS MOVLPS MOVHPS MOVLHPS MOVHLPS MOVNTI MOVNTPS MOVMSKPS RCPPS ANDPS ANDNPS ADDPS SUBPS MULPS DIVPS MINPS MAXPS ORPS XORPS SQRTPS RSQRTPS CMPPS SHUFPS CVTPS2PD CVTDQ2PS CVTPI2PS COMISS UCOMISS UNPCKLPS UNPCKHPS _________________________________________________ SSE instructions not in the above list most likely can only be assembled using XMM0-XMM7 RAX-RDI. */ I64 DemoAllSSE() {// Not meant to be run, just to // test Assembler and Unassembler against. // Will likely cause General Protection crash if run. // Some SSE ops require 16-byte aligned vals or else crash. I64 reg RDX res = 0; asm { LFENCE MFENCE SFENCE LDMXCSR [RDX] STMXCSR [RDX] LDDQU XMM0, [RDX] MOVAPS XMM0, XMM15 MOVAPD XMM0, XMM1 MOVUPS XMM0, XMM15 MOVUPD XMM0, XMM1 MOVSS XMM0, XMM1 MOVSD_SSE XMM0, XMM1 MOVD XMM0, ESI MOVQ XMM0, RDX MOVLPS XMM15, [RDX] MOVLPD XMM0, [RDX] MOVHPS XMM0, [RDX] MOVHPD XMM0, [RDX] MOVDQA XMM0, XMM1 MOVDQU XMM0, XMM1 MOVDDUP XMM0, XMM1 MOVSLDUP XMM0, XMM1 MOVSHDUP XMM0, XMM1 MOVLHPS XMM0, XMM15 MOVHLPS XMM0, XMM15 MOVNTI [RDX], R13 // MOVNTPS [RDX], XMM0 // MOVNTPD [RDX], XMM0 // MOVNTDQ [RDX], XMM1 MOVMSKPS RDX, XMM15 MOVMSKPD RDX, XMM0 PMOVMSKB RDX, XMM0 PMOVSXBW XMM0, XMM1 PMOVSXBD XMM0, XMM1 // PMOVSXBQ XMM0, XMM1 PMOVSXWD XMM0, XMM1 PMOVSXWQ XMM0, XMM1 PMOVSXDQ XMM0, XMM1 PMOVZXBW XMM0, XMM1 PMOVZXBD XMM0, XMM1 // PMOVZXBQ XMM0, XMM1 PMOVZXWD XMM0, XMM1 PMOVZXWQ XMM0, XMM1 PMOVZXDQ XMM0, XMM1 HADDPS XMM0, XMM1 HADDPD XMM0, XMM1 HSUBPS XMM0, XMM1 HSUBPD XMM0, XMM1 ADDSUBPS XMM0, XMM1 ADDSUBPD XMM0, XMM1 RCPSS XMM0, XMM1 RCPPS XMM0, XMM15 ANDPS XMM0, XMM15 ANDPD XMM0, XMM1 ANDNPS XMM0, XMM15 ANDNPD XMM0, XMM1 ADDSS XMM0, XMM1 ADDSD XMM0, XMM1 ADDPS XMM0, XMM15 ADDPD XMM0, XMM1 SUBSS XMM0, XMM1 SUBSD XMM0, XMM1 SUBPS XMM0, XMM15 SUBPD XMM0, XMM1 MULSS XMM0, XMM1 MULSD XMM0, XMM1 MULPS XMM0, XMM15 MULPD XMM0, XMM1 DIVSS XMM0, XMM0 DIVSD XMM0, XMM1 DIVPS XMM0, XMM15 DIVPD XMM0, XMM1 MINSS XMM0, XMM1 MINSD XMM0, XMM1 MINPS XMM0, XMM15 MINPD XMM0, XMM1 MAXSS XMM0, XMM1 MAXSD XMM0, XMM1 MAXPS XMM0, XMM15 MAXPD XMM0, XMM1 ORPS XMM0, XMM15 ORPD XMM0, XMM1 XORPS XMM0, XMM15 XORPD XMM0, XMM1 SQRTSS XMM0, XMM1 SQRTSD XMM0, XMM1 SQRTPS XMM0, XMM15 SQRTPD XMM0, XMM1 RSQRTSS XMM0, XMM1 RSQRTPS XMM0, XMM15 CVTSD2SS XMM0, XMM1 CVTSS2SD XMM0, XMM1 CVTSS2SI RDX, XMM0 CVTSI2SD XMM0, RDX CVTSI2SS XMM0, RDX CVTSD2SI RDX, XMM1 CVTPS2PD XMM0, XMM15 CVTDQ2PS XMM0, XMM15 CVTPS2DQ XMM0, XMM1 CVTPD2DQ XMM0, XMM1 CVTDQ2PD XMM0, XMM1 CVTPD2PS XMM0, XMM9 CVTPI2PS XMM15, [RDX] CVTPI2PD XMM0, [RDX] CVTTSS2SI RDX, XMM1 CVTTSD2SI RDX, XMM0 CVTTPS2DQ XMM0, XMM1 CVTTPD2DQ XMM0, XMM1 COMISS XMM0, XMM15 COMISD XMM0, XMM1 UCOMISS XMM0, XMM15 UCOMISD XMM0, XMM1 UNPCKLPS XMM0, XMM15 UNPCKLPD XMM0, XMM1 UNPCKHPS XMM0, XMM15 UNPCKHPD XMM0, XMM1 MASKMOVDQU XMM0, XMM1 CMPSS XMM0, XMM1, 0x74 CMPSD_SSE XMM0, XMM1, 0x63 CMPPS XMM0, XMM15, 0x52 CMPPD XMM0, XMM1, 0x41 SHUFPS XMM0, XMM15, 0x30 SHUFPD XMM0, XMM1, 0x29 PSHUFB XMM0, XMM1 PSHUFD XMM0, XMM1, 0x18 PSHUFLW XMM0, XMM1, 0x07 PSHUFHW XMM0, XMM1, 0x96 ROUNDSS XMM0, XMM1, 0x85 ROUNDSD XMM0, XMM1, 0x74 ROUNDPS XMM0, XMM1, 0x63 ROUNDPD XMM0, XMM1, 0x52 BLENDVPS XMM0, XMM1 BLENDVPD XMM0, XMM1 BLENDPS XMM0, XMM1, 0x99 BLENDPD XMM0, XMM1, 0x99 PBLENDW XMM0, XMM1, 0x99 DPPS XMM0, XMM1, 0x99 DPPD XMM0, XMM1, 0x99 PALIGNR XMM0, XMM1, 0x99 PCLMULQDQ XMM0, XMM1, 0x99 PEXTRB AH, XMM1, 0x99 PEXTRW RDX, XMM1, 0x99 PEXTRD ESI, XMM1, 0x99 PEXTRQ RDX, XMM1, 0x99 EXTRACTPS ESI, XMM1, 0x99 PINSRB XMM0, AH, 0x99 PINSRW XMM0, AX, 0x99 PINSRD XMM0, ESI, 0x99 PINSRQ XMM0, RDX, 0x99 PCMPESTRM XMM0, XMM1, 0x99 PCMPESTRI XMM0, XMM1, 0x99 PCMPISTRM XMM0, XMM1, 0x99 PCMPISTRI XMM0, XMM1, 0x99 PCMPGTB XMM0, XMM1 PCMPGTW XMM0, XMM1 PCMPGTD XMM0, XMM1 PCMPGTQ XMM0, XMM1 PCMPEQB XMM0, XMM1 PCMPEQW XMM0, XMM1 PCMPEQD XMM0, XMM1 PCMPEQQ XMM0, XMM1 PSRLW XMM0, XMM1 PSRLD XMM0, XMM1 PSRLQ XMM0, XMM1 PSLLW XMM0, XMM1 PSLLD XMM0, XMM1 PSLLQ XMM0, XMM1 PSRAW XMM0, XMM1 PSRAD XMM0, XMM1 PAVGB XMM0, XMM1 PAVGW XMM0, XMM1 PABSB XMM0, XMM1 PABSW XMM0, XMM1 PABSD XMM0, XMM1 PAND XMM0, XMM1 PANDN XMM0, XMM1 PHADDW XMM0, XMM1 PHADDD XMM0, XMM1 PHADDSW XMM0, XMM1 PADDUSB XMM0, XMM1 PADDUSW XMM0, XMM1 PADDSB XMM0, XMM1 PADDSW XMM0, XMM1 PHSUBW XMM0, XMM1 PHSUBD XMM0, XMM1 PHSUBSW XMM0, XMM1 PSUBUSB XMM0, XMM1 PSUBUSW XMM0, XMM1 PSUBSB XMM0, XMM1 PSUBSW XMM0, XMM1 PADDB XMM0, XMM1 PADDW XMM0, XMM1 PADDD XMM0, XMM1 PADDQ XMM0, XMM1 PSUBB XMM0, XMM1 PSUBW XMM0, XMM1 PSUBD XMM0, XMM1 PSUBQ XMM0, XMM1 PHMINPOSUW XMM0, XMM1 PMINUB XMM0, XMM1 PMINUW XMM0, XMM1 PMINUD XMM0, XMM1 PMINSB XMM0, XMM1 PMINSW XMM0, XMM1 PMINSD XMM0, XMM1 PMAXUB XMM0, XMM1 PMAXUW XMM0, XMM1 PMAXUD XMM0, XMM1 PMAXSB XMM0, XMM1 PMAXSW XMM0, XMM1 PMAXSD XMM0, XMM1 PMULLW XMM0, XMM1 PMULLD XMM0, XMM1 PMULHRSW XMM0, XMM1 PMULHUW XMM0, XMM1 PMULHW XMM0, XMM1 PMULUDQ XMM0, XMM1 PMULDQ XMM0, XMM1 PMADDWD XMM0, XMM1 PMADDUBSW XMM0, XMM1 PTEST XMM0, XMM1 PSLLDQ XMM0, 0x11 PSRLDQ XMM0, 0x22 PSIGNB XMM0, XMM1 PSIGNW XMM0, XMM1 PSIGND XMM0, XMM1 PXOR XMM0, XMM1 PACKSSWB XMM0, XMM1 PACKUSWB XMM0, XMM1 PACKSSDW XMM0, XMM1 PACKUSDW XMM0, XMM1 PUNPCKLBW XMM0, XMM1 PUNPCKLWD XMM0, XMM1 PUNPCKLDQ XMM0, XMM1 PUNPCKLQDQ XMM0, XMM1 PUNPCKHBW XMM0, XMM1 PUNPCKHWD XMM0, XMM1 PUNPCKHDQ XMM0, XMM1 PUNPCKHQDQ XMM0, XMM1 PSADBW XMM0, XMM1 MPSADBW XMM0, XMM1, 0x21 INSERTPS XMM0, XMM1, 0x32 PREFETCHT0 [RDX] PREFETCHT1 [RDX] PREFETCHT2 [RDX] PREFETCHNTA [RDX] } return res; } "\n$BK,1$Unassembling all SSE ops, note errors:\n$BK,0$"; Uf("DemoAllSSE"); U0 DumpXMM() { // Dump XMM registers I64 reg RAX quad; asm {PEXTRQ RAX, XMM0, 1} "XMM0: 0x%016X", quad; asm {PEXTRQ RAX, XMM0, 0} "%016X\n", quad; asm {PEXTRQ RAX, XMM1, 1} "XMM1: 0x%016X", quad; asm {PEXTRQ RAX, XMM1, 0} "%016X\n", quad; asm {PEXTRQ RAX, XMM2, 1} "XMM2: 0x%016X", quad; asm {PEXTRQ RAX, XMM2, 0} "%016X\n", quad; asm {PEXTRQ RAX, XMM3, 1} "XMM3: 0x%016X", quad; asm {PEXTRQ RAX, XMM3, 0} "%016X\n", quad; asm {PEXTRQ RAX, XMM4, 1} "XMM4: 0x%016X", quad; asm {PEXTRQ RAX, XMM4, 0} "%016X\n", quad; asm {PEXTRQ RAX, XMM5, 1} "XMM5: 0x%016X", quad; asm {PEXTRQ RAX, XMM5, 0} "%016X\n", quad; asm {PEXTRQ RAX, XMM6, 1} "XMM6: 0x%016X", quad; asm {PEXTRQ RAX, XMM6, 0} "%016X\n", quad; asm {PEXTRQ RAX, XMM7, 1} "XMM7: 0x%016X", quad; asm {PEXTRQ RAX, XMM7, 0} "%016X\n", quad; "\n\n"; } "\n\nDump XMM Registers function definition:\n"; Uf("DumpXMM"); "\n\n"; I64 DemoSSE() { I64 reg RDX res = 0; asm { MOV RDX, 0x3939393939393939 MOVQ XMM0, RDX MOV RDX, 0x7777777777777777 MOVQ XMM1, RDX MOV RDX, 0x2021202120212021 MOVQ XMM2, RDX MOV RDX, 0x0123456789012345 MOVQ XMM3, RDX MOV RDX, 0x0000400000005000 MOVQ XMM6, RDX MOV RDX, 0x0000000300000002 MOVQ XMM7, RDX } DumpXMM; asm { PSLLDQ XMM0, 8 PSLLDQ XMM1, 8 PSLLDQ XMM2, 8 PSLLDQ XMM3, 8 PHADDD XMM6, XMM7 } DumpXMM; return res; } "\n$BK,1$Unassembling and running SSE demo.\n$BK,0$"; Uf("DemoSSE"); "\n"; DemoSSE;