Implement 3 operand support in assembler.

Add support for opcodes up to 8 bytes long.
Fix 0x66 prefixed opcode instruction unassembly, at the cost of losing higher-register access on prefixed instructions.
Add almost every SSE instruction.
This commit is contained in:
TomAwezome 2021-10-18 01:52:41 -04:00
parent 07cfd873b8
commit f000c2fbdb
13 changed files with 818 additions and 71 deletions

View file

@ -511,8 +511,8 @@ U8 asm_seg_prefixes[6] = {0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65};
Bool ParseAsmInst(CCompCtrl *cc, CHashOpcode *tmpo, I64 argcount)
{
CAOTCtrl *aotc = cc->aotc;
I64 i, j, arg1, arg2, om, seg, arg1mask, arg2mask;
CAsmArg *tmpa1, *tmpa2;
I64 i, j, arg1, arg2, arg3, om, seg, arg1mask, arg2mask, arg3mask;
CAsmArg *tmpa1, *tmpa2, *tmpa3;
Bool ModrM_complete, U8_avail = FALSE, found_second_possible = FALSE, isXMM, isXMM1, isXMM2;
CInst *tmpins;
CAsmIns cur, best;
@ -522,17 +522,26 @@ Bool ParseAsmInst(CCompCtrl *cc, CHashOpcode *tmpo, I64 argcount)
arg1mask = AsmMakeArgMask(cc, &aotc->arg1);
else
arg1mask = 1;
if (argcount > 1)
arg2mask = AsmMakeArgMask(cc, &aotc->arg2);
else
arg2mask = 1;
if (argcount > 2)
arg3mask = AsmMakeArgMask(cc, &aotc->arg3);
else
arg3mask = 1;
for (i = 0; i < tmpo->inst_entry_count; i++)
{
tmpins = &tmpo->ins[i];
if (tmpins->arg1 == ARGT_REL8 || tmpins->arg2 == ARGT_REL8)
U8_avail = TRUE;
if (Bt(&arg1mask, tmpins->arg1) && Bt(&arg2mask, tmpins->arg2) &&
(!(tmpins->flags & IEF_NOT_IN_64_BIT) || aotc->seg_size != 64))
(!(tmpins->flags & IEF_NOT_IN_64_BIT) || aotc->seg_size != 64))
{
MemSet(&cur, 0, sizeof(CAsmIns));
cur.tmpins = tmpins;
@ -545,14 +554,16 @@ Bool ParseAsmInst(CCompCtrl *cc, CHashOpcode *tmpo, I64 argcount)
else
cur.REX = 0x40;
}
cur.disp.imm_flag = TRUE;
cur.imm.imm_flag = TRUE;
om = tmpins->opcode_modifier;
arg1 = tmpins->arg1;
arg2 = tmpins->arg2;
tmpa1 = &aotc->arg1;
tmpa2 = &aotc->arg2;
cur.last_opcode_U8 = tmpins->opcode[tmpins->opcode_count - 1];
cur.disp.imm_flag = TRUE;
cur.imm.imm_flag = TRUE;
om = tmpins->opcode_modifier;
arg1 = tmpins->arg1;
arg2 = tmpins->arg2;
arg3 = tmpins->arg3;
tmpa1 = &aotc->arg1;
tmpa2 = &aotc->arg2;
tmpa3 = &aotc->arg3;
cur.last_opcode_U8 = tmpins->opcode[tmpins->opcode_count - 1];
isXMM1 = ARGT_XMM <= arg1 <= ARGT_XMM0;
isXMM2 = ARGT_XMM <= arg2 <= ARGT_XMM0;
@ -584,7 +595,7 @@ Bool ParseAsmInst(CCompCtrl *cc, CHashOpcode *tmpo, I64 argcount)
cur.imm.U8_count = 2;
cur.imm.imm_flag = FALSE;
}
else if (om==OM_CD)
else if (om == OM_CD)
{
cur.imm.U8_count = 4;
cur.imm.imm_flag = FALSE;
@ -730,7 +741,7 @@ Bool ParseAsmInst(CCompCtrl *cc, CHashOpcode *tmpo, I64 argcount)
MemCopy(&cur.imm.num, &tmpa1->num, sizeof(CAsmNum));
}
}
else if (argcount == 2)
else if (argcount >= 2)
{
if (best.U8_count != 255 && !found_second_possible && !best.is_default)
{
@ -807,7 +818,7 @@ Bool ParseAsmInst(CCompCtrl *cc, CHashOpcode *tmpo, I64 argcount)
else
{
cur.ModrM |= (tmpa2->reg1 & 7) << 3;
if (tmpa2->reg1 & 15 >7)
if (tmpa2->reg1 & 15 > 7)
cur.REX |= 4;
if (!isXMM2 && tmpa2->reg1 >= 20) //RBPu8, RSPu8, RSIu8, RDIu8?
cur.has_REX = TRUE;
@ -955,7 +966,8 @@ Bool ParseAsmInst(CCompCtrl *cc, CHashOpcode *tmpo, I64 argcount)
cur.REX |= 8;
}
}
if (ARGT_IMM8 <= arg2 <= ARGT_IMM64 || ARGT_UIMM8 <= arg2 <= ARGT_UIMM64)
if (ARGT_IMM8 <= arg2 <= ARGT_IMM64 ||
ARGT_UIMM8 <= arg2 <= ARGT_UIMM64)
{
MemCopy(&cur.imm.num, &tmpa2->num, sizeof(CAsmNum));
if (arg2 == ARGT_IMM8 || arg2 == ARGT_UIMM8)
@ -974,6 +986,12 @@ Bool ParseAsmInst(CCompCtrl *cc, CHashOpcode *tmpo, I64 argcount)
cur.REX |= 8;
}
}
if (argcount > 2 && (arg3 == ARGT_IMM8 || arg3 == ARGT_UIMM8))
{ // SSE 3 operand RMI encoding
MemCopy(&cur.imm.num, &tmpa3->num, sizeof(CAsmNum));
if (arg3 == ARGT_IMM8 || arg3 == ARGT_UIMM8)
cur.imm.U8_count = 1;
}
}
cur.U8_count = tmpins->opcode_count + cur.disp.U8_count + cur.imm.U8_count;
if (cur.has_ModrM)
@ -1005,7 +1023,8 @@ Bool ParseAsmInst(CCompCtrl *cc, CHashOpcode *tmpo, I64 argcount)
AOTStoreCodeU8(cc, OC_ADDR_SIZE_PREFIX); //Operand size override
if (aotc->seg_size == 64 && best.REX & 0x40 == 0x40 && (best.REX != 0x40 || best.has_REX) &&
(best.REX & 7 || !(tmpins->flags & IEF_REX_ONLY_R8_R15 ||
tmpins->flags & IEF_REX_XOR_LIKE && tmpa1->reg1 == tmpa2->reg1 && best.ModrM & 0xC0 == 0xC0)))
tmpins->flags & IEF_REX_XOR_LIKE && tmpa1->reg1 == tmpa2->reg1 && best.ModrM & 0xC0 == 0xC0)) &&
!(isXMM && (tmpins->opcode[0] == 0x66 || tmpins->opcode[0] == 0xF3 || tmpins->opcode[0] == 0xF2)))//SSE kludge
AOTStoreCodeU8(cc, best.REX);
for (j = 0; j < tmpins->opcode_count - 1; j++)
AOTStoreCodeU8(cc, tmpins->opcode[j]);
@ -1143,7 +1162,7 @@ U0 ParseBinFile(CCompCtrl *cc)
Lex(cc);
}
U0 ParseAsmBlk(CCompCtrl *cc,I64 comp_flags)
U0 ParseAsmBlk(CCompCtrl *cc, I64 comp_flags)
{
CAOTCtrl *aotc = cc->aotc;
I64 i, j, k, argcount, old_flags = cc->flags & CCF_ASM_EXPRESSIONS;
@ -1292,10 +1311,27 @@ U0 ParseAsmBlk(CCompCtrl *cc,I64 comp_flags)
ParseAsmArg(cc, &aotc->arg2, TRUE);
else
ParseAsmArg(cc, &aotc->arg2, FALSE);
if (tmpo->ins[0].arg3)
{
argcount++;
if (cc->token != ',')
LexExcept(cc, "Expecting ',' at ");
else
{
Lex(cc); //skip ','
if (ARGT_REL8 <= tmpo->ins[0].arg3 <= ARGT_REL32)
ParseAsmArg(cc, &aotc->arg3, TRUE);
else
ParseAsmArg(cc, &aotc->arg3, FALSE);
}
}
}
}
}
ParseAsmInst(cc, tmpo,argcount);
ParseAsmInst(cc, tmpo, argcount);
}
else if (cc->hash_entry->type & HTT_EXPORT_SYS_SYM)
{

View file

@ -138,7 +138,7 @@ U0 AsmHashLoad()
if (tmpins->flags & IEF_STI_LIKE && tmpins->slash_val != SV_I_REG)
tmpins->uasm_slash_val = SV_STI_LIKE;
tmpins->arg1 = tmpins->arg2 = tmpins->size1 = tmpins->size2 = 0;
tmpins->arg1 = tmpins->arg2 = tmpins->arg3 = tmpins->size1 = tmpins->size2 = tmpins->size3 = 0;
if (cc->token == TK_IDENT)
{
j = DefineMatch(cc->cur_str, "ST_ARG_TYPES");
@ -170,6 +170,14 @@ U0 AsmHashLoad()
else if (Bt(&cmp.size_arg_mask[16], j))
tmpins->size2 = 128;
if (cc->token == TK_IDENT)
{
j = DefineMatch(cc->cur_str, "ST_ARG_TYPES");
Lex(cc);
tmpins->arg3 = j;
if (Bt(&cmp.size_arg_mask[1],j))
tmpins->size3 = 8;
}
}
}
}

View file

@ -372,6 +372,7 @@ U0 CompLoadDefines()
"XMM5\0"
"XMM6\0"
"XMM7\0"
"XMM8\0"
"XMM9\0"
"XMM10\0"
"XMM11\0"

View file

@ -135,6 +135,7 @@ XMM XMM3 3;
XMM XMM4 4;
XMM XMM5 5;
XMM XMM6 6;
XMM XMM7 7;
XMM XMM8 8;
XMM XMM9 9;
XMM XMM10 10;
@ -255,6 +256,12 @@ OPCODE POPA 0x61, 16;
OPCODE POPAD 0x61, 32;
OPCODE POPF 0x9D, 16;
OPCODE POPFD 0x9D, 32;
OPCODE LFENCE 0x0F 0xAE 0xE8;
OPCODE MFENCE 0x0F 0xAE 0xF0;
OPCODE SFENCE 0x0F 0xAE 0xF8;
OPCODE LDMXCSR 0x0F 0xAE,/2 M32;
OPCODE STMXCSR 0x0F 0xAE,/3 M32;
OPCODE LDDQU 0xF2 0x0F 0xF0,/R XMM M64;
OPCODE MOVAPS
0x0F 0x28,/R XMM XMM128
0x0F 0x29,/R XMM128 XMM;
@ -266,49 +273,296 @@ OPCODE MOVUPS
0x0F 0x11,/R XMM128 XMM;
OPCODE MOVUPD
0x66 0x0F 0x10,/R XMM XMM128
0x66 0x0F 0x10,/R XMM128 XMM;
0x66 0x0F 0x11,/R XMM128 XMM;
OPCODE MOVSS
0xF3 0x0F 0x10,/R XMM XMM128
0xF3 0x0F 0x11,/R XMM128 XMM;
OPCODE MOVSD_SSE
0xF2 0x0F 0x10,/R XMM XMM64
0xF2 0x0F 0x11,/R XMM64 XMM;
OPCODE MOVD
0x66 0x0F 0x6E,/R XMM RM32
0x66 0x0F 0x7E,/R RM32 XMM;
OPCODE MOVQ
0x66 0x48 0x0F 0x6E,/R `XMM RM64
0x66 0x48 0x0F 0x7E,/R `RM64 XMM
0x66 0x48 0x0F 0x6E,/R XMM RM64
0x66 0x48 0x0F 0x7E,/R RM64 XMM
0xF3 0x0F 0x7E,/R XMM XMM64;
OPCODE PMOVMSKB 0x66 0x0F 0xD7,/R `R64 XMM;
OPCODE MOVNTDQ 0x66 0x0F 0xE7,/R XMM128 XMM;
OPCODE MOVLPS
0x0F 0x12,/R XMM M64
0x0F 0x13,/R M64 XMM;
OPCODE MOVLPD
0x66 0x0F 0x12,/R XMM M64
0x66 0x0F 0x13,/R M64 XMM;
OPCODE MOVHPS
0x0F 0x16,/R XMM M64
0x0F 0x17,/R M64 XMM;
OPCODE MOVHPD
0x66 0x0F 0x16,/R XMM M64
0x66 0x0F 0x17,/R M64 XMM;
OPCODE MOVDQA
0x66 0x0F 0x6F,/R XMM XMM128
0x66 0x0F 0x7F,/R XMM128 XMM;
OPCODE MOVDQU
0xF3 0x0F 0x6F,/R XMM XMM128
0xF3 0x0F 0x7F,/R XMM128 XMM;
OPCODE MOVDDUP 0xF2 0x0F 0x12,/R XMM XMM64;
OPCODE MOVSLDUP 0xF3 0x0F 0x12,/R XMM XMM128;
OPCODE MOVSHDUP 0xF3 0x0F 0x16,/R XMM XMM128;
OPCODE MOVLHPS 0x0F 0x16,/R XMM XMM128; // 2nd arg kludge
OPCODE MOVHLPS 0x0F 0x12,/R XMM XMM128; // 2nd arg kludge
OPCODE MOVNTI
0x0F 0xC3,/R M32 R32
0x0F 0xC3,/R M64 R64;//do we need M32 line..?
OPCODE MOVNTPS 0x0F 0x2B,/R M128 XMM;
OPCODE MOVNTPD 0x66 0x0F 0x2B,/R M128 XMM;
OPCODE MOVNTDQ 0x66 0x0F 0xE7,/R M128 XMM;
OPCODE MOVMSKPS 0x0F 0x50,/R R64 XMM;
OPCODE MOVMSKPD 0x66 0x0F 0x50,/R R64 XMM;
OPCODE PMOVMSKB 0x66 0x0F 0xD7,/R R64 XMM;
OPCODE PMOVSXBW 0x66 0x0F 0x38 0x20,/R XMM XMM64;
OPCODE PMOVSXBD 0x66 0x0F 0x38 0x21,/R XMM XMM32;
OPCODE PMOVSXBQ 0x66 0x0F 0x38 0x22,/R XMM XMM16;
OPCODE PMOVSXWD 0x66 0x0F 0x38 0x23,/R XMM XMM64;
OPCODE PMOVSXWQ 0x66 0x0F 0x38 0x24,/R XMM XMM32;
OPCODE PMOVSXDQ 0x66 0x0F 0x38 0x25,/R XMM XMM64;
OPCODE PMOVZXBW 0x66 0x0F 0x38 0x30,/R XMM XMM64;
OPCODE PMOVZXBD 0x66 0x0F 0x38 0x31,/R XMM XMM32;
OPCODE PMOVZXBQ 0x66 0x0F 0x38 0x32,/R XMM XMM16;
OPCODE PMOVZXWD 0x66 0x0F 0x38 0x33,/R XMM XMM64;
OPCODE PMOVZXWQ 0x66 0x0F 0x38 0x34,/R XMM XMM32;
OPCODE PMOVZXDQ 0x66 0x0F 0x38 0x35,/R XMM XMM64;
OPCODE HADDPS 0xF2 0x0F 0x7C,/R XMM XMM128;
OPCODE HADDPD 0x66 0x0F 0x7C,/R XMM XMM128;
OPCODE HSUBPS 0xF2 0x0F 0x7D,/R XMM XMM128;
OPCODE HSUBPD 0x66 0x0F 0x7D,/R XMM XMM128;
OPCODE ADDSUBPS 0xF2 0x0F 0xD0,/R XMM XMM128;
OPCODE ADDSUBPD 0x66 0x0F 0xD0,/R XMM XMM128;
OPCODE RCPSS 0xF3 0x0F 0x53,/R XMM XMM32;
OPCODE RCPPS 0x0F 0x53,/R XMM XMM128;
OPCODE ANDPS 0x0F 0x54,/R XMM XMM128;
OPCODE ANDPD 0x66 0x0F 0x54,/R XMM XMM128;
OPCODE ANDNPS 0x0F 0x55,/R XMM XMM128;
OPCODE ANDNPD 0x66 0x0F 0x55,/R XMM XMM128;
OPCODE ADDSS 0xF3 0x0F 0x58,/R XMM XMM32;
OPCODE SUBSS 0xF3 0x0F 0x5C,/R XMM XMM32;
OPCODE MULSS 0xF3 0x0F 0x59,/R XMM XMM32;
OPCODE DIVSS 0xF3 0x0F 0x5E,/R XMM XMM32;
OPCODE MULSD 0xF2 0x0F 0x59,/R XMM XMM64;
OPCODE DIVSD 0xF2 0x0F 0x5E,/R XMM XMM64;
OPCODE ADDSD 0xF2 0x0F 0x58,/R XMM XMM64;
OPCODE SUBSD 0xF2 0x0F 0x5C,/R XMM XMM64;
OPCODE ADDPS 0x0F 0x58,/R XMM XMM128;
OPCODE SUBPS 0x0F 0x5C,/R XMM XMM128;
OPCODE MULPS 0x0F 0x59,/R XMM XMM128;
OPCODE DIVPS 0x0F 0x5E,/R XMM XMM128;
OPCODE MINPS 0x0F 0x5D,/R XMM XMM128;
OPCODE MAXPS 0x0F 0x5F,/R XMM XMM128;
OPCODE XORPS 0x0F 0x57,/R XMM XMM128;
OPCODE ADDPD 0x66 0x0F 0x58,/R XMM XMM128;
OPCODE SUBSS 0xF3 0x0F 0x5C,/R XMM XMM32;
OPCODE SUBSD 0xF2 0x0F 0x5C,/R XMM XMM64;
OPCODE SUBPS 0x0F 0x5C,/R XMM XMM128;
OPCODE SUBPD 0x66 0x0F 0x5C,/R XMM XMM128;
OPCODE MULSS 0xF3 0x0F 0x59,/R XMM XMM32;
OPCODE MULSD 0xF2 0x0F 0x59,/R XMM XMM64;
OPCODE MULPS 0x0F 0x59,/R XMM XMM128;
OPCODE MULPD 0x66 0x0F 0x59,/R XMM XMM128;
OPCODE DIVSS 0xF3 0x0F 0x5E,/R XMM XMM32;
OPCODE DIVSD 0xF2 0x0F 0x5E,/R XMM XMM64;
OPCODE DIVPS 0x0F 0x5E,/R XMM XMM128;
OPCODE DIVPD 0x66 0x0F 0x5E,/R XMM XMM128;
OPCODE RSQRTSS 0xF3 0x0F 0x52,/R XMM XMM32;
OPCODE MINSS 0xF3 0x0F 0x5D,/R XMM XMM32;
OPCODE MINSD 0xF2 0x0F 0x5D,/R XMM XMM64;
OPCODE MINPS 0x0F 0x5D,/R XMM XMM128;
OPCODE MINPD 0x66 0x0F 0x5D,/R XMM XMM128;
OPCODE MAXSS 0xF3 0x0F 0x5F,/R XMM XMM32;
OPCODE MAXSD 0xF2 0x0F 0x5F,/R XMM XMM64;
OPCODE MAXPS 0x0F 0x5F,/R XMM XMM128;
OPCODE MAXPD 0x66 0x0F 0x5F,/R XMM XMM128;
OPCODE ORPS 0x0F 0x56,/R XMM XMM128;
OPCODE ORPD 0x66 0x0F 0x56,/R XMM XMM128;
OPCODE XORPS 0x0F 0x57,/R XMM XMM128;
OPCODE XORPD 0x66 0x0F 0x57,/R XMM XMM128;
OPCODE SQRTSS 0xF3 0x0F 0x51,/R XMM XMM32;
OPCODE SQRTSD 0xF2 0x0F 0x51,/R XMM XMM64;
OPCODE SQRTPS 0x0F 0x51,/R XMM XMM128;
OPCODE SQRTPD 0x66 0x0F 0x51,/R XMM XMM128;
OPCODE RSQRTSS 0xF3 0x0F 0x52,/R XMM XMM32;
OPCODE RSQRTPS 0x0F 0x52,/R XMM XMM128;
OPCODE CVTSD2SS 0xF2 0x0F 0x5A,/R XMM XMM64;
OPCODE CVTSS2SD 0xF3 0x0F 0x5A,/R XMM XMM32;
OPCODE CVTSI2SD 0xF2 0x48 0x0F 0x2A,/R `XMM RM64;
OPCODE CVTSD2SI 0xF2 0x48 0x0F 0x2D,/R `R64 XMM64;
OPCODE CVTPD2PS 0x0F 0x5A,/R 16 XMM XMM128;
OPCODE CVTPS2PD 0x0F 0x5A,/R 16 XMM XMM128;
OPCODE MOVLHPS 0x0F 0x16,/R XMM XMM128;
OPCODE MOVHLPS 0x0F 0x12,/R XMM XMM128;
OPCODE CVTSS2SI
0xF3 0x0F 0x2D,/R R32 XMM32
0xF3 0x48 0x0F 0x2D,/R R64 XMM;
OPCODE CVTSI2SD
0xF2 0x0F 0x2A,/R XMM RM32
0xF2 0x48 0x0F 0x2A,/R XMM RM64;
OPCODE CVTSI2SS
0xF3 0x0F 0x2A,/R XMM RM32
0xF3 0x48 0x0F 0x2A,/R XMM RM64;
OPCODE CVTSD2SI
0xF2 0x0F 0x2D,/R R32 XMM64
0xF2 0x48 0x0F 0x2D,/R R64 XMM64;
OPCODE CVTPS2PD 0x0F 0x5A,/R XMM XMM64;
OPCODE CVTDQ2PS 0x0F 0x5B,/R XMM XMM128;
OPCODE CVTPS2DQ 0x66 0x0F 0x5B,/R XMM XMM128;
OPCODE CVTPD2DQ 0xF2 0x0F 0xE6,/R XMM XMM128;
OPCODE CVTDQ2PD 0xF3 0x0F 0xE6,/R XMM XMM128;
OPCODE CVTPD2PS 0x66 0x0F 0x5A,/R XMM XMM128;
OPCODE CVTPI2PS 0x0F 0x2A,/R XMM M64;
OPCODE CVTPI2PD 0x66 0x0F 0x2A,/R XMM M64;
OPCODE CVTTSS2SI
0xF3 0x0F 0x2C,/R R32 XMM32
0xF3 0x48 0x0F 0x2C,/R R64 XMM32;
OPCODE CVTTSD2SI
0xF2 0x0F 0x2C,/R R32 XMM64
0xF2 0x48 0x0F 0x2C,/R R64 XMM64;
OPCODE CVTTPS2DQ 0xF3 0x0F 0x5B,/R XMM XMM128;
OPCODE CVTTPD2DQ 0x66 0x0F 0xE6,/R XMM XMM128;
OPCODE COMISS 0x0F 0x2F,/R XMM XMM32;
OPCODE COMISD 0x66 0x0F 0x2F,/R XMM XMM64;
OPCODE UCOMISS 0x0F 0x2E,/R XMM XMM32;
OPCODE UCOMISD 0x66 0x0F 0x2E,/R XMM XMM64;
OPCODE UNPCKLPS 0x0F 0x14,/R XMM XMM128;
OPCODE UNPCKLPD 0x66 0x0F 0x14,/R XMM XMM128;
OPCODE UNPCKHPS 0x0F 0x15,/R XMM XMM128;
OPCODE UNPCKHPD 0x66 0x0F 0x15,/R XMM XMM128;
OPCODE MASKMOVDQU 0x66 0x0F 0xF7,/R XMM XMM128; // 2nd arg kludge
OPCODE CMPSS 0xF3 0x0F 0xC2,/R IB XMM XMM32 IMM8;
OPCODE CMPSD_SSE 0xF2 0x0F 0xC2,/R IB XMM XMM64 IMM8;
OPCODE CMPPS 0x0F 0xC2, /R IB XMM XMM128 IMM8;
OPCODE CMPPD 0x66 0x0F 0xC2,/R IB XMM XMM128 IMM8;
OPCODE SHUFPS 0x0F 0xC6,/R IB XMM XMM128 IMM8;
OPCODE SHUFPD 0x66 0x0F 0xC6,/R IB XMM XMM128 IMM8;
OPCODE PSHUFB 0x66 0x0F 0x38 0x00,/R XMM XMM128;
OPCODE PSHUFD 0x66 0x0F 0x70,/R IB XMM XMM128 IMM8;
OPCODE PSHUFLW 0xF2 0x0F 0x70,/R IB XMM XMM128 IMM8;
OPCODE PSHUFHW 0xF3 0x0F 0x70,/R IB XMM XMM128 IMM8;
OPCODE ROUNDSS 0x66 0x0F 0x3A 0x0A,/R IB XMM XMM32 IMM8;
OPCODE ROUNDSD 0x66 0x0F 0x3A 0x0B,/R IB XMM XMM64 IMM8;
OPCODE ROUNDPS 0x66 0x0F 0x3A 0x08,/R IB XMM XMM128 IMM8;
OPCODE ROUNDPD 0x66 0x0F 0x3A 0x09,/R IB XMM XMM128 IMM8;
OPCODE BLENDVPS 0x66 0x0F 0x38 0x14,/R XMM XMM128;
OPCODE BLENDVPD 0x66 0x0F 0x38 0x15,/R XMM XMM128;
OPCODE BLENDPS 0x66 0x0F 0x3A 0x0C,/R IB XMM XMM128 IMM8;
OPCODE BLENDPD 0x66 0x0F 0x3A 0x0D,/R IB XMM XMM128 IMM8;
OPCODE PBLENDW 0x66 0x0F 0x3A 0x0E,/R IB XMM XMM128 IMM8;
OPCODE DPPS 0x66 0x0F 0x3A 0x40,/R IB XMM XMM128 IMM8;
OPCODE DPPD 0x66 0x0F 0x3A 0x41,/R IB XMM XMM128 IMM8;
OPCODE PALIGNR 0x66 0x0F 0x3A 0x0F,/R IB XMM XMM128 IMM8;
OPCODE PCLMULQDQ 0x66 0x0F 0x3A 0x44,/R IB XMM XMM128 IMM8;
OPCODE PEXTRB 0x66 0x0F 0x3A 0x14,/R IB RM8 XMM IMM8;
OPCODE PEXTRW
0x66 0x0F 0x3A 0x15,/R IB RM16 XMM IMM8
0x66 0x0F 0xC5,/R IB R64 XMM IMM8;
OPCODE PEXTRD 0x66 0x0F 0x3A 0x16,/R IB RM32 XMM IMM8;
OPCODE PEXTRQ 0x66 0x48 0x0F 0x3A 0x16,/R IB RM64 XMM IMM8;
OPCODE EXTRACTPS 0x66 0x0F 0x3A 0x17,/R IB RM32 XMM IMM8;
OPCODE PINSRB 0x66 0x0F 0x3A 0x20,/R IB XMM RM8 IMM8; // RM8 is actually R32/M8 ?...
OPCODE PINSRW 0x66 0x0F 0xC4,/R IB XMM RM16 IMM8; // RM16 is actually R32/M16 ?...
OPCODE PINSRD 0x66 0x0F 0x3A 0x22,/R IB XMM RM32 IMM8;
OPCODE PINSRQ 0x66 0x48 0x0F 0x3A 0x22,/R IB XMM RM64 IMM8;
OPCODE PCMPESTRM 0x66 0x0F 0x3A 0x60,/R XMM XMM128 IMM8;
OPCODE PCMPESTRI 0x66 0x0F 0x3A 0x61,/R XMM XMM128 IMM8;
OPCODE PCMPISTRM 0x66 0x0F 0x3A 0x62,/R XMM XMM128 IMM8;
OPCODE PCMPISTRI 0x66 0x0F 0x3A 0x63,/R XMM XMM128 IMM8;
OPCODE PCMPGTB 0x66 0x0F 0x64,/R XMM XMM128;
OPCODE PCMPGTW 0x66 0x0F 0x65,/R XMM XMM128;
OPCODE PCMPGTD 0x66 0x0F 0x66,/R XMM XMM128;
OPCODE PCMPGTQ 0x66 0x0F 0x38 0x37,/R XMM XMM128;
OPCODE PCMPEQB 0x66 0x0F 0x74,/R XMM XMM128;
OPCODE PCMPEQW 0x66 0x0F 0x75,/R XMM XMM128;
OPCODE PCMPEQD 0x66 0x0F 0x76,/R XMM XMM128;
OPCODE PCMPEQQ 0x66 0x0F 0x38 0x29,/R XMM XMM128;
OPCODE PSRLW
0x66 0x0F 0xD1,/R XMM XMM128
0x66 0x0F 0x71,/2 IB XMM IMM8;
OPCODE PSRLD
0x66 0x0F 0xD2,/R XMM XMM128
0x66 0x0F 0x72,/2 IB XMM IMM8;
OPCODE PSRLQ
0x66 0x0F 0xD3,/R XMM XMM128
0x66 0x0F 0x73,/2 IB XMM IMM8;
OPCODE PSLLW
0x66 0x0F 0xF1,/R XMM XMM128
0x66 0x0F 0x71,/6 IB XMM IMM8;
OPCODE PSLLD
0x66 0x0F 0xF2,/R XMM XMM128
0x66 0x0F 0x72,/6 XMM IMM8;
OPCODE PSLLQ
0x66 0x0F 0xF3,/R XMM XMM128
0x66 0x0F 0x73,/6 XMM IMM8;
OPCODE PSRAW
0x66 0x0F 0xE1,/R XMM XMM128
0x66 0x0F 0x71,/4 XMM IMM8;
OPCODE PSRAD
0x66 0x0F 0xE2,/R XMM XMM128
0x66 0x0F 0x72,/4 XMM IMM8;
OPCODE PAVGB 0x66 0x0F 0xE0,/R XMM XMM128;
OPCODE PAVGW 0x66 0x0F 0xE3,/R XMM XMM128;
OPCODE PABSB 0x66 0x0F 0x38 0x1C,/R XMM XMM128;
OPCODE PABSW 0x66 0x0F 0x38 0x1D,/R XMM XMM128;
OPCODE PABSD 0x66 0x0F 0x38 0x1E,/R XMM XMM128;
OPCODE PAND 0x66 0x0F 0xDB,/R XMM XMM128;
OPCODE PANDN 0x66 0x0F 0xDF,/R XMM XMM128;
OPCODE PHADDW 0x66 0x0F 0x38 0x01,/R XMM XMM128;
OPCODE PHADDD 0x66 0x0F 0x38 0x02,/R XMM XMM128;
OPCODE PHADDSW 0x66 0x0F 0x38 0x03,/R XMM XMM128;
OPCODE PADDUSB 0x66 0x0F 0xDC,/R XMM XMM128;
OPCODE PADDUSW 0x66 0x0F 0xDD,/R XMM XMM128;
OPCODE PADDSB 0x66 0x0F 0xEC,/R XMM XMM128;
OPCODE PADDSW 0x66 0x0F 0xED,/R XMM XMM128;
OPCODE PHSUBW 0x66 0x0F 0x38 0x05,/R XMM XMM128;
OPCODE PHSUBD 0x66 0x0F 0x38 0x06,/R XMM XMM128;
OPCODE PHSUBSW 0x66 0x0F 0x38 0x07,/R XMM XMM128;
OPCODE PSUBUSB 0x66 0x0F 0xD8,/R XMM XMM128;
OPCODE PSUBUSW 0x66 0x0F 0xD9,/R XMM XMM128;
OPCODE PSUBSB 0x66 0x0F 0xE8,/R XMM XMM128;
OPCODE PSUBSW 0x66 0x0F 0xE9,/R XMM XMM128;
OPCODE PADDB 0x66 0x0F 0xFC,/R XMM XMM128;
OPCODE PADDW 0x66 0x0F 0xFD,/R XMM XMM128;
OPCODE PADDD 0x66 0x0F 0xFE,/R XMM XMM128;
OPCODE PADDQ 0x66 0x0F 0xD4,/R XMM XMM128;
OPCODE PSUBB 0x66 0x0F 0xF8,/R XMM XMM128;
OPCODE PSUBW 0x66 0x0F 0xF9,/R XMM XMM128;
OPCODE PSUBD 0x66 0x0F 0xFA,/R XMM XMM128;
OPCODE PSUBQ 0x66 0x0F 0xFB,/R XMM XMM128;
OPCODE PHMINPOSUW 0x66 0x0F 0x38 0x41,/R XMM XMM128;
OPCODE PMINUB 0x66 0x0F 0xDA,/R XMM XMM128;
OPCODE PMINUW 0x66 0x0F 0x38 0x3A,/R XMM XMM128;
OPCODE PMINUD 0x66 0x0F 0x38 0x3B,/R XMM XMM128;
OPCODE PMINSB 0x66 0x0F 0x38 0x38,/R XMM XMM128;
OPCODE PMINSW 0x66 0x0F 0xFE,/R XMM XMM128;
OPCODE PMINSD 0x66 0x0F 0x38 0x39,/R XMM XMM128;
OPCODE PMAXUB 0x66 0x0F 0xDE,/R XMM XMM128;
OPCODE PMAXUW 0x66 0x0F 0x38 0x3E,/R XMM XMM128;
OPCODE PMAXUD 0x66 0x0F 0x37 0x3F,/R XMM XMM128;
OPCODE PMAXSB 0x66 0x0F 0x38 0x3C,/R XMM XMM128;
OPCODE PMAXSW 0x66 0x0F 0xEE,/R XMM XMM128;
OPCODE PMAXSD 0x66 0x0F 0x38 0x3D,/R XMM XMM128;
OPCODE PMULLW 0x66 0x0F 0xD5,/R XMM XMM128;
OPCODE PMULLD 0x66 0x0F 0x38 0x40,/R XMM XMM128;
OPCODE PMULHRSW 0x66 0x0F 0x38 0x0B,/R XMM XMM128;
OPCODE PMULHUW 0x66 0x0F 0xE4,/R XMM XMM128;
OPCODE PMULHW 0x66 0x0F 0xE5,/R XMM XMM128;
OPCODE PMULUDQ 0x66 0x0F 0xF4,/R XMM XMM128;
OPCODE PMULDQ 0x66 0x0F 0x38 0x28,/R XMM XMM128;
OPCODE PMADDWD 0x66 0x0F 0xF5,/R XMM XMM128;
OPCODE PMADDUBSW 0x66 0x0F 0x38 0x04,/R XMM XMM128;
OPCODE PTEST 0x66 0x0F 0x38 0x17,/R XMM XMM128;
OPCODE PSLLDQ 0x66 0x0F 0x73,/7 XMM IMM8;
OPCODE PSRLDQ 0x66 0x0F 0x73,/3 XMM IMM8;
OPCODE PSIGNB 0x66 0x0F 0x38 0x08,/R XMM XMM128;
OPCODE PSIGNW 0x66 0x0F 0x38 0x09,/R XMM XMM128;
OPCODE PSIGND 0x66 0x0F 0x38 0x0A,/R XMM XMM128;
OPCODE PXOR 0x66 0x0F 0xEF,/R XMM XMM128;
OPCODE PACKSSWB 0x66 0x0F 0x63,/R XMM XMM128;
OPCODE PACKUSWB 0x66 0x0F 0x67,/R XMM XMM128;
OPCODE PACKSSDW 0x66 0x0F 0x6B,/R XMM XMM128;
OPCODE PACKUSDW 0x66 0x0F 0x38 0x2B,/R XMM XMM128;
OPCODE PUNPCKLBW 0x66 0x0F 0x60,/R XMM XMM128;
OPCODE PUNPCKLWD 0x66 0x0F 0x61,/R XMM XMM128;
OPCODE PUNPCKLDQ 0x66 0x0F 0x62,/R XMM XMM128;
OPCODE PUNPCKLQDQ 0x66 0x0F 0x6C,/R XMM XMM128;
OPCODE PUNPCKHBW 0x66 0x0F 0x68,/R XMM XMM128;
OPCODE PUNPCKHWD 0x66 0x0F 0x69,/R XMM XMM128;
OPCODE PUNPCKHDQ 0x66 0x0F 0x6A,/R XMM XMM128;
OPCODE PUNPCKHQDQ 0x66 0x0F 0x6D,/R XMM XMM128;
OPCODE PSADBW 0x66 0x0F 0xF6,/R XMM XMM128;
OPCODE MPSADBW 0x66 0x0F 0x3A 0x42,/R IB XMM XMM128 IMM8;
OPCODE INSERTPS 0x66 0x0F 0x3A 0x21,/R IB XMM XMM32 IMM8;
OPCODE PREFETCHT0 0x0F 0x18,/1 M8;
OPCODE PREFETCHT1 0x0F 0x18,/2 M8;
OPCODE PREFETCHT2 0x0F 0x18,/3 M8;
OPCODE PREFETCHNTA 0x0F 0x18,/0 M8;
OPCODE MOV
// 0xA0, AL MOFFS8
0xA1, 16 AX MOFFS16

View file

@ -285,14 +285,14 @@ U0 UAsmHashLoad()
U0 Ui(U8 *buf, U8 **_rip, I64 seg_size=64, I64 *_jmp_dst=NULL, Bool just_ins=FALSE)
{//Unassembles one instruction
I64 i, disp, imm, opsize, opadd, arg1, arg2, reloced_arg1, reloced_arg2,
arg1_size = 0, arg2_size = 0, reloced_arg1_size, reloced_arg2_size,
I64 i, disp, imm, opsize, opadd, arg1, arg2, arg3, reloced_arg1, reloced_arg2,
arg1_size = 0, arg2_size = 0, arg3_size = 0, reloced_arg1_size, reloced_arg2_size,
ModrM = -1, SIB = -1, scale, r1, r2, Mod = -1, RM1 = -1, RM2 = -1, REX = -1, REX_r = 0, REX_x = 0, REX_b = 0;
Bool cont, isXMM, isXMM1, isXMM2;
CInst *tmpins, *tmpins2;
CHashOpcode *tmpo;
U8 *rip = *_rip, *ptr, *reloced_arg1_st, *reloced_arg2_st, *bin_data_area1, *bin_data_area2,
line1[512], line2[512], buf2[512], arg1_st[512], arg2_st[512], seg_overrides[32];
line1[512], line2[512], buf2[512], arg1_st[512], arg2_st[512], arg3_st[512], seg_overrides[32];
if (_jmp_dst)
*_jmp_dst = -1;
@ -313,6 +313,8 @@ U0 Ui(U8 *buf, U8 **_rip, I64 seg_size=64, I64 *_jmp_dst=NULL, Bool just_ins=FAL
}
*arg1_st = 0;
*arg2_st = 0;
*arg3_st = 0;
if (!IsRaw && PutSrcLink(rip, 1, line1))
CatPrint(line1, "\n");
else
@ -366,6 +368,38 @@ U0 Ui(U8 *buf, U8 **_rip, I64 seg_size=64, I64 *_jmp_dst=NULL, Bool just_ins=FAL
break;
case OC_OP_SIZE_PREFIX:
// OC_OP_SIZE_PREFIX (0x66) legacy byte is ignored if it's an SSE instruction, checking early here.
tmpins = InstEntryFind(rip, opsize, seg_size);
for (i = 0; i < tmpins->opcode_count; i++)
{
if (tmpins->opcode[i] != rip[i])
{
// ST_ERR_ST "$$HL,0$$\nBAD MATCH SSE check #1! %02X with %02X\n", tmpins->opcode[i], rip[i];
// D(tmpins, sizeof(CInst));"$$HL,1$$";
if (opsize == 32 && seg_size == 64)
{
tmpins2 = InstEntryFind(rip, 64, seg_size);
if (tmpins2 != tmpins)
tmpins = tmpins2;
}
break; // leave for loop
}
}
if (ARGT_XMM <= tmpins->arg1 <= ARGT_XMM0 ||
ARGT_XMM <= tmpins->arg2 <= ARGT_XMM0)
{
for (i = 0; i < tmpins->opcode_count; i++)
{
if (tmpins->opcode[i] != rip[i])
{
ST_ERR_ST "$$HL,0$$BAD MATCH SSE check #2! %02X with %02X\n", tmpins->opcode[i], rip[i];
// D(tmpins, sizeof(CInst));"\n$$HL,1$$";
goto sse_check_fail; // evaluate inst op with RIP. if not identical, not a match.
}
}
goto sse_check_jump; // if match, jump to processing. else, treat as legacy prefix.
}
sse_check_fail:
if (opsize == 16)
opsize = 32;
else
@ -407,15 +441,16 @@ U0 Ui(U8 *buf, U8 **_rip, I64 seg_size=64, I64 *_jmp_dst=NULL, Bool just_ins=FAL
rip[1] >> 3 & 7 == rip[1] & 7)
tmpins = tmpins2;
}
sse_check_jump:
rip += tmpins->opcode_count;
tmpo = tmpins(U8 *) - tmpins->ins_entry_num * sizeof(CInst) - offset(CHashOpcode.ins);
if (just_ins)
*line1 = 0;
CatPrint(line1, tmpo->str);
arg1 = tmpins->arg1;
arg2 = tmpins->arg2;
arg3 = tmpins->arg3;
if (arg1_size = tmpins->size1)
{
@ -432,6 +467,13 @@ U0 Ui(U8 *buf, U8 **_rip, I64 seg_size=64, I64 *_jmp_dst=NULL, Bool just_ins=FAL
else
CatPrint(arg2_st, "U%d ", arg2_size);
}
if (arg3_size = tmpins->size3)
{
if (Bt(&uasm.signed_arg_mask, arg3))
CatPrint(arg3_st, "I%d ", arg3_size);
else
CatPrint(arg3_st, "U%d ", arg3_size);
}
if (tmpins->flags & IEF_PLUS_OPCODE)
{
@ -511,7 +553,8 @@ U0 Ui(U8 *buf, U8 **_rip, I64 seg_size=64, I64 *_jmp_dst=NULL, Bool just_ins=FAL
RM2 = ModrM >> 3 & 7 + REX_r;
if (Mod < 3 && RM1 & 7 == 4 && !isXMM) // SSE instructions have lower bit clear and doesn't mean SIB
SIB = *rip++;
if (Mod == 1) {
if (Mod == 1)
{
disp = *rip(U8 *)++;
CatPrint(reloced_arg1_st, "0x%02X", disp);
}
@ -705,7 +748,6 @@ U0 Ui(U8 *buf, U8 **_rip, I64 seg_size=64, I64 *_jmp_dst=NULL, Bool just_ins=FAL
CatPrint(arg1_st, "%Z", *rip++ - tmpins->opcode[tmpins->opcode_count - 1], "ST_FSTACK_REGS");
break;
}
switch (arg2)
{
case ARGT_IMM8:
@ -749,16 +791,27 @@ U0 Ui(U8 *buf, U8 **_rip, I64 seg_size=64, I64 *_jmp_dst=NULL, Bool just_ins=FAL
CatPrint(arg2_st, "%Z", *rip++ - tmpins->opcode[tmpins->opcode_count - 1], "ST_FSTACK_REGS");
break;
}
switch (arg3)
{
case ARGT_IMM8:
case ARGT_UIMM8:
CatPrint(arg3_st, "0x%02X", *rip(U8 *)++);
break;
}
if (tmpins->flags & IEF_ENDING_ZERO)
rip++;
if (*arg1_st)
{
CatPrint(line1, "%h*c", 6 - StrLen(tmpo->str), ' ');
CatPrint(line1, "%h*c", 12 - StrLen(tmpo->str), ' ');
CatPrint(line1, " %s", arg1_st);
}
if (*arg2_st)
CatPrint(line1, ", %s", arg2_st);
if (*arg3_st)
CatPrint(line1, ", %s", arg3_st);
CatPrint(line1, "\n");
CatPrint(line2, "\n");
if (!just_ins)

View file

@ -1,13 +1,20 @@
$WW,1$$FG,5$$TX+CX,"ChangeLog"$$FG$
$IV,1$----10/18/21 01:16:10----$IV,0$
* Raised version number to 1.03.
* Increased $LK+PU,"CInst",A="MN:CInst"$ opcode size from 4 to 8.
* $MA-X+PU,"Added 3 operand support to assembler.",LM="Find(\"arg3\", \"::/Compiler/\");View;"$
* Added almost all SSE instructions to OpCodes.DD. SSE instructions operating on MMX registers were not implemented.
* Fixed $LK+PU,"0x66 prefix unassembly opcode reporting",A="FF:::/Compiler/UAsm.CC,OC_OP_SIZE_PREFIX"$, assembler now $LK+PU,"prevents duplicate REX",A="FF:::/Compiler/Asm.CC,tmpins->opcode[0] == 0x66"$ for prefixed SSE instructions. The tradeoff for fixing this is that 0x66, 0xF2, or 0xF3 prefixed SSE opcodes can only use XMM0-XMM7 RAX-RDI.
$IV,1$----10/09/21 04:05:32----$IV,0$
* Add MOVQ, PMOVMSKB, MINPS, MAXPS, XORPS, CVTSI2SD, and CVTSD2SI assembly opcodes to OpCodes.DD. $TX+IV,"Note"$: Assembler adds extra REX 0x48 byte to SSE instructions using 64bit registers. Using ` opcode flag only disables prefixing REX 0x48 when using lower registers; TODO modify assembler to fix this for SSE instructions, or make new OpCodes flag to never prefix REX 0x48.
* Added MOVQ, PMOVMSKB, MINPS, MAXPS, XORPS, CVTSI2SD, and CVTSD2SI assembly opcodes to OpCodes.DD.
$IV,1$----10/07/21 18:06:15----$IV,0$
* Raised version number to 1.02.
* Fix Std Font characters after char 128.
* Fixed Std Font characters after char 128.
* Added links to ZealOS-specific applications in PersonalMenu.
* Removed all files from ::/Downloads.
* Started initial work integrating SSE instructions into assembler. $MA-X+PU,"See changes here.",LM="Find(\"XMM\", \"::/Compiler/OpCodes.DD\", \"-i\");\"\n\n\";Find(\"XMM\", \"::/Compiler/Asm.CC\", \"-i\");\"\n\";Find(\" == 128\", \"::/Compiler/UAsm.CC\", \"-i\");\"\n\";Find(\"XMM\", \"::/Compiler/UAsm.CC\", \"-i\");\"\n\";Find(\"size_arg_mask[16]\", \"::/Compiler/AsmInit.CC\", \"-i\");\"\n\";Find(\"size_arg_mask[0] = 0xF\", \"::/Compiler/AsmInit.CC\", \"-i\");\"\n\";Find(\"arg_mask[17\", \"::/Kernel/KernelA.HH\", \"-i\");\"\n\";Find(\"SSE\", \"::/Kernel/MultiProc.CC\", \"-i\");View;"$ Only added 2 arg opcodes and only added instructions <= 4 bytes, compiler and assembler must first be expanded upon to support opcodes with more than two args and instructions larger than 4 bytes. $TX+IV,"Current bugs"$: unassembling 0x66 prefixed opcodes reports wrong opcode, unassembling SSE instructions that use non-128-bit vars will incorrectly report U128, MOVHLPS and MOVLHPS 2nd operand is defined XMM128 to hack ModrM order but operands should be XMM XMM, due to MOVSD name clash we define opcode as MOVSD_SSE. $TX+IV,"TOD"$$TX+IV,"O"$: add CosmiC U128 and XMM register allocation support, find way to ensure 16-byte aligned stack and aligned instructions.
* Started initial work integrating SSE instructions into assembler. $MA-X+PU,"See changes here.",LM="Find(\"XMM\", \"::/Compiler/OpCodes.DD\", \"-i\");\"\n\n\";Find(\"XMM\", \"::/Compiler/Asm.CC\", \"-i\");\"\n\";Find(\" == 128\", \"::/Compiler/UAsm.CC\", \"-i\");\"\n\";Find(\"XMM\", \"::/Compiler/UAsm.CC\", \"-i\");\"\n\";Find(\"size_arg_mask[16]\", \"::/Compiler/AsmInit.CC\", \"-i\");\"\n\";Find(\"size_arg_mask[0] = 0xF\", \"::/Compiler/AsmInit.CC\", \"-i\");\"\n\";Find(\"arg_mask[17\", \"::/Kernel/KernelA.HH\", \"-i\");\"\n\";Find(\"SSE\", \"::/Kernel/MultiProc.CC\", \"-i\");View;"$ Only added 2 arg opcodes and only added instructions <= 4 bytes. $TX+IV,"Current bugs"$: unassembling SSE instructions that use non-128-bit vars will incorrectly report U128, MOVHLPS and MOVLHPS 2nd operand is defined XMM128 to hack ModrM order but operands should be XMM XMM, due to MOVSD name clash we define opcode as MOVSD_SSE. $TX+IV,"TODO"$: add CosmiC U128 and XMM register allocation support, find way to ensure 16-byte aligned stack and aligned instructions.
$IV,1$----09/21/21 23:35:00----$IV,0$
* Raised version number to 1.01.
@ -140,7 +147,6 @@ $IV,1$----07/05/21 14:58:59----$IV,0$
* Allow OS Upgrade feature on VMs.
$IV,1$----07/03/21 00:01:32----$IV,0$
* Raised version number to 0.05.
* Fixed HTML Demo, implemented it into DoDistro process. Outputs to ::/HTML/.

View file

@ -1,6 +1,6 @@
$WW,1$* You can adjust the mouse movement rate by setting global variables in your start-up file. See $LK,"mouse scale",A="FF:~/HomeLocalize.CC,mouse_hard.scale"$.
* You can set your local time zone by setting the $FG,4$local_time_offset$FG$ global variable in a start-up file. It's units are $LK,"CDATE_FREQ",A="MN:CDATE_FREQ"$. See $LK,"local time",A="FF:~/HomeLocalize.CC,local_time"$.
* You can set your local time zone by setting the $FG,4$local_time_offset$FG$ global variable in a start-up file. Its units are $LK,"CDATE_FREQ",A="MN:CDATE_FREQ"$. See $LK,"local time",A="FF:~/HomeLocalize.CC,local_time"$.
* You can change the palette using the $LK+PU,"PaletteSet",A="FF:::/System/Gr/GrPalette.CC,U0 PaletteSet"$ functions. Pressing $FG,2$<CTRL-ALT-f>$FG$ toggles standard and VGA ROM font.

View file

@ -13,7 +13,7 @@ $WW,1$$FG,5$$TX+CX,"Tips"$$FG$
* If you make changes to ZealOS files in your $FG,2$/Home$FG$ directory, generally you reboot to make them take effect. (You don't compile anything.) You should have two ZealOS partitions on your hard drive because a syntax error in a start-up file will make the partition unbootable. Boot to the second partition or boot to a standard ZealOS CD/DVD and use $LK,"Mount",A="MN:Mount"$() to mount your hard drive.
* You can copy your files to a mirrored ident partition, periodically with $LK,"CopyTree",A="MN:CopyTree"$() commands in scripts. You can do merge commands with a menu entry like this:
$FG,2$Merge(\"C:/*\",\"D:/*\",\"+r+d\");$FG$ to check your changes.
$FG,2$Merge("C:/","D:/","+r+d");$FG$ to check your changes.
* $FG,2$<CTRL-m>$FG$ at the cmd line to access your PersonalMenu. Place macros there with $FG,2$<CTRL-l>$FG$, or icon-like sprites with $FG,2$<CTRL-r>$FG$. Use the $FG,2$Pop-Up$FG$ option on macros to $LK,"Spawn",A="MN:Spawn"$() a task to run a file. It dies when it is finished. This returns mem to the system. Be sure to press $FG,2$<CTRL-s>$FG$ to save your macro/menu area after making changes.
@ -23,7 +23,7 @@ $FG,2$Merge(\"C:/*\",\"D:/*\",\"+r+d\");$FG$ to check your changes.
* You can adjust the mouse movement rate by setting global variables in your start-up file. See $LK,"mouse scale",A="FF:~/HomeLocalize.CC,mouse_hard.scale"$.
* You can set your local time zone by setting the $LK,"local_time_offset",A="MN:local_time_offset"$ global variable in a start-up file. It's units are $LK,"CDATE_FREQ",A="MN:CDATE_FREQ"$. See $LK,"local time",A="FF:~/HomeLocalize.CC,local_time"$.
* You can set your local time zone by setting the $LK,"local_time_offset",A="MN:local_time_offset"$ global variable in a start-up file. Its units are $LK,"CDATE_FREQ",A="MN:CDATE_FREQ"$. See $LK,"local time",A="FF:~/HomeLocalize.CC,local_time"$.
* $FG,2$<CTRL-SHIFT-L>$FG$ in the editor to reindent a $LK,"CosmiC",A="FI:::/Doc/CosmiC.DD"$ function or renumber an asm routine's local labels.

389
src/Home/SSE.CC Executable file
View file

@ -0,0 +1,389 @@
/*
The CosmiC assembler currently has partial SSE support.
SSE instructions with no prefix are fully supported,
but instructions prefixed with 0x66, 0xF2, or F3
can only be assembled using the lower registers,
XMM0-XMM7 and RAX-RDI.
SSE instructions supporting XMM0-XMM15 RAX-R15:
_________________________________________________
MOVAPS MOVUPS MOVLPS MOVHPS MOVLHPS MOVHLPS
MOVNTI MOVNTPS MOVMSKPS
RCPPS ANDPS ANDNPS ADDPS SUBPS MULPS
DIVPS MINPS MAXPS ORPS XORPS SQRTPS
RSQRTPS CMPPS SHUFPS
CVTPS2PD CVTDQ2PS CVTPI2PS
COMISS UCOMISS UNPCKLPS UNPCKHPS
_________________________________________________
SSE instructions not in the above list most likely
can only be assembled using XMM0-XMM7 RAX-RDI.
*/
I64 DemoAllSSE()
{// Not meant to be run, just to
// test Assembler and Unassembler against.
// Will likely cause General Protection crash if run.
// Some SSE ops require 16-byte aligned vals or else crash.
I64 reg RDX res = 0;
asm
{
LFENCE
MFENCE
SFENCE
LDMXCSR [RDX]
STMXCSR [RDX]
LDDQU XMM0, [RDX]
MOVAPS XMM0, XMM15
MOVAPD XMM0, XMM1
MOVUPS XMM0, XMM15
MOVUPD XMM0, XMM1
MOVSS XMM0, XMM1
MOVSD_SSE XMM0, XMM1
MOVD XMM0, ESI
MOVQ XMM0, RDX
MOVLPS XMM15, [RDX]
MOVLPD XMM0, [RDX]
MOVHPS XMM0, [RDX]
MOVHPD XMM0, [RDX]
MOVDQA XMM0, XMM1
MOVDQU XMM0, XMM1
MOVDDUP XMM0, XMM1
MOVSLDUP XMM0, XMM1
MOVSHDUP XMM0, XMM1
MOVLHPS XMM0, XMM15
MOVHLPS XMM0, XMM15
MOVNTI [RDX], R13
// MOVNTPS [RDX], XMM0
// MOVNTPD [RDX], XMM0
// MOVNTDQ [RDX], XMM1
MOVMSKPS RDX, XMM15
MOVMSKPD RDX, XMM0
PMOVMSKB RDX, XMM0
PMOVSXBW XMM0, XMM1
PMOVSXBD XMM0, XMM1
// PMOVSXBQ XMM0, XMM1
PMOVSXWD XMM0, XMM1
PMOVSXWQ XMM0, XMM1
PMOVSXDQ XMM0, XMM1
PMOVZXBW XMM0, XMM1
PMOVZXBD XMM0, XMM1
// PMOVZXBQ XMM0, XMM1
PMOVZXWD XMM0, XMM1
PMOVZXWQ XMM0, XMM1
PMOVZXDQ XMM0, XMM1
HADDPS XMM0, XMM1
HADDPD XMM0, XMM1
HSUBPS XMM0, XMM1
HSUBPD XMM0, XMM1
ADDSUBPS XMM0, XMM1
ADDSUBPD XMM0, XMM1
RCPSS XMM0, XMM1
RCPPS XMM0, XMM15
ANDPS XMM0, XMM15
ANDPD XMM0, XMM1
ANDNPS XMM0, XMM15
ANDNPD XMM0, XMM1
ADDSS XMM0, XMM1
ADDSD XMM0, XMM1
ADDPS XMM0, XMM15
ADDPD XMM0, XMM1
SUBSS XMM0, XMM1
SUBSD XMM0, XMM1
SUBPS XMM0, XMM15
SUBPD XMM0, XMM1
MULSS XMM0, XMM1
MULSD XMM0, XMM1
MULPS XMM0, XMM15
MULPD XMM0, XMM1
DIVSS XMM0, XMM0
DIVSD XMM0, XMM1
DIVPS XMM0, XMM15
DIVPD XMM0, XMM1
MINSS XMM0, XMM1
MINSD XMM0, XMM1
MINPS XMM0, XMM15
MINPD XMM0, XMM1
MAXSS XMM0, XMM1
MAXSD XMM0, XMM1
MAXPS XMM0, XMM15
MAXPD XMM0, XMM1
ORPS XMM0, XMM15
ORPD XMM0, XMM1
XORPS XMM0, XMM15
XORPD XMM0, XMM1
SQRTSS XMM0, XMM1
SQRTSD XMM0, XMM1
SQRTPS XMM0, XMM15
SQRTPD XMM0, XMM1
RSQRTSS XMM0, XMM1
RSQRTPS XMM0, XMM15
CVTSD2SS XMM0, XMM1
CVTSS2SD XMM0, XMM1
CVTSS2SI RDX, XMM0
CVTSI2SD XMM0, RDX
CVTSI2SS XMM0, RDX
CVTSD2SI RDX, XMM1
CVTPS2PD XMM0, XMM15
CVTDQ2PS XMM0, XMM15
CVTPS2DQ XMM0, XMM1
CVTPD2DQ XMM0, XMM1
CVTDQ2PD XMM0, XMM1
CVTPD2PS XMM0, XMM9
CVTPI2PS XMM15, [RDX]
CVTPI2PD XMM0, [RDX]
CVTTSS2SI RDX, XMM1
CVTTSD2SI RDX, XMM0
CVTTPS2DQ XMM0, XMM1
CVTTPD2DQ XMM0, XMM1
COMISS XMM0, XMM15
COMISD XMM0, XMM1
UCOMISS XMM0, XMM15
UCOMISD XMM0, XMM1
UNPCKLPS XMM0, XMM15
UNPCKLPD XMM0, XMM1
UNPCKHPS XMM0, XMM15
UNPCKHPD XMM0, XMM1
MASKMOVDQU XMM0, XMM1
CMPSS XMM0, XMM1, 0x74
CMPSD_SSE XMM0, XMM1, 0x63
CMPPS XMM0, XMM15, 0x52
CMPPD XMM0, XMM1, 0x41
SHUFPS XMM0, XMM15, 0x30
SHUFPD XMM0, XMM1, 0x29
PSHUFB XMM0, XMM1
PSHUFD XMM0, XMM1, 0x18
PSHUFLW XMM0, XMM1, 0x07
PSHUFHW XMM0, XMM1, 0x96
ROUNDSS XMM0, XMM1, 0x85
ROUNDSD XMM0, XMM1, 0x74
ROUNDPS XMM0, XMM1, 0x63
ROUNDPD XMM0, XMM1, 0x52
BLENDVPS XMM0, XMM1
BLENDVPD XMM0, XMM1
BLENDPS XMM0, XMM1, 0x99
BLENDPD XMM0, XMM1, 0x99
PBLENDW XMM0, XMM1, 0x99
DPPS XMM0, XMM1, 0x99
DPPD XMM0, XMM1, 0x99
PALIGNR XMM0, XMM1, 0x99
PCLMULQDQ XMM0, XMM1, 0x99
PEXTRB AH, XMM1, 0x99
PEXTRW RDX, XMM1, 0x99
PEXTRD ESI, XMM1, 0x99
PEXTRQ RDX, XMM1, 0x99
EXTRACTPS ESI, XMM1, 0x99
PINSRB XMM0, AH, 0x99
PINSRW XMM0, AX, 0x99
PINSRD XMM0, ESI, 0x99
PINSRQ XMM0, RDX, 0x99
PCMPESTRM XMM0, XMM1, 0x99
PCMPESTRI XMM0, XMM1, 0x99
PCMPISTRM XMM0, XMM1, 0x99
PCMPISTRI XMM0, XMM1, 0x99
PCMPGTB XMM0, XMM1
PCMPGTW XMM0, XMM1
PCMPGTD XMM0, XMM1
PCMPGTQ XMM0, XMM1
PCMPEQB XMM0, XMM1
PCMPEQW XMM0, XMM1
PCMPEQD XMM0, XMM1
PCMPEQQ XMM0, XMM1
PSRLW XMM0, XMM1
PSRLD XMM0, XMM1
PSRLQ XMM0, XMM1
PSLLW XMM0, XMM1
PSLLD XMM0, XMM1
PSLLQ XMM0, XMM1
PSRAW XMM0, XMM1
PSRAD XMM0, XMM1
PAVGB XMM0, XMM1
PAVGW XMM0, XMM1
PABSB XMM0, XMM1
PABSW XMM0, XMM1
PABSD XMM0, XMM1
PAND XMM0, XMM1
PANDN XMM0, XMM1
PHADDW XMM0, XMM1
PHADDD XMM0, XMM1
PHADDSW XMM0, XMM1
PADDUSB XMM0, XMM1
PADDUSW XMM0, XMM1
PADDSB XMM0, XMM1
PADDSW XMM0, XMM1
PHSUBW XMM0, XMM1
PHSUBD XMM0, XMM1
PHSUBSW XMM0, XMM1
PSUBUSB XMM0, XMM1
PSUBUSW XMM0, XMM1
PSUBSB XMM0, XMM1
PSUBSW XMM0, XMM1
PADDB XMM0, XMM1
PADDW XMM0, XMM1
PADDD XMM0, XMM1
PADDQ XMM0, XMM1
PSUBB XMM0, XMM1
PSUBW XMM0, XMM1
PSUBD XMM0, XMM1
PSUBQ XMM0, XMM1
PHMINPOSUW XMM0, XMM1
PMINUB XMM0, XMM1
PMINUW XMM0, XMM1
PMINUD XMM0, XMM1
PMINSB XMM0, XMM1
PMINSW XMM0, XMM1
PMINSD XMM0, XMM1
PMAXUB XMM0, XMM1
PMAXUW XMM0, XMM1
PMAXUD XMM0, XMM1
PMAXSB XMM0, XMM1
PMAXSW XMM0, XMM1
PMAXSD XMM0, XMM1
PMULLW XMM0, XMM1
PMULLD XMM0, XMM1
PMULHRSW XMM0, XMM1
PMULHUW XMM0, XMM1
PMULHW XMM0, XMM1
PMULUDQ XMM0, XMM1
PMULDQ XMM0, XMM1
PMADDWD XMM0, XMM1
PMADDUBSW XMM0, XMM1
PTEST XMM0, XMM1
PSLLDQ XMM0, 0x11
PSRLDQ XMM0, 0x22
PSIGNB XMM0, XMM1
PSIGNW XMM0, XMM1
PSIGND XMM0, XMM1
PXOR XMM0, XMM1
PACKSSWB XMM0, XMM1
PACKUSWB XMM0, XMM1
PACKSSDW XMM0, XMM1
PACKUSDW XMM0, XMM1
PUNPCKLBW XMM0, XMM1
PUNPCKLWD XMM0, XMM1
PUNPCKLDQ XMM0, XMM1
PUNPCKLQDQ XMM0, XMM1
PUNPCKHBW XMM0, XMM1
PUNPCKHWD XMM0, XMM1
PUNPCKHDQ XMM0, XMM1
PUNPCKHQDQ XMM0, XMM1
PSADBW XMM0, XMM1
MPSADBW XMM0, XMM1, 0x21
INSERTPS XMM0, XMM1, 0x32
PREFETCHT0 [RDX]
PREFETCHT1 [RDX]
PREFETCHT2 [RDX]
PREFETCHNTA [RDX]
}
return res;
}
"\n$$BK,1$$Unassembling all SSE ops, note errors:\n$$BK,0$$";
Uf("DemoAllSSE");
U0 DumpXMM()
{ // Dump XMM registers
I64 reg RAX quad;
asm {PEXTRQ RAX, XMM0, 1}
"XMM0: 0x%016X", quad;
asm {PEXTRQ RAX, XMM0, 0}
"%016X\n", quad;
asm {PEXTRQ RAX, XMM1, 1}
"XMM1: 0x%016X", quad;
asm {PEXTRQ RAX, XMM1, 0}
"%016X\n", quad;
asm {PEXTRQ RAX, XMM2, 1}
"XMM2: 0x%016X", quad;
asm {PEXTRQ RAX, XMM2, 0}
"%016X\n", quad;
asm {PEXTRQ RAX, XMM3, 1}
"XMM3: 0x%016X", quad;
asm {PEXTRQ RAX, XMM3, 0}
"%016X\n", quad;
asm {PEXTRQ RAX, XMM4, 1}
"XMM4: 0x%016X", quad;
asm {PEXTRQ RAX, XMM4, 0}
"%016X\n", quad;
asm {PEXTRQ RAX, XMM5, 1}
"XMM5: 0x%016X", quad;
asm {PEXTRQ RAX, XMM5, 0}
"%016X\n", quad;
asm {PEXTRQ RAX, XMM6, 1}
"XMM6: 0x%016X", quad;
asm {PEXTRQ RAX, XMM6, 0}
"%016X\n", quad;
asm {PEXTRQ RAX, XMM7, 1}
"XMM7: 0x%016X", quad;
asm {PEXTRQ RAX, XMM7, 0}
"%016X\n", quad;
"\n\n";
}
"\n\nDump XMM Registers function definition:\n";
Uf("DumpXMM");
"\n\n";
I64 DemoSSE()
{
I64 reg RDX res = 0;
asm
{
MOV RDX, 0x3939393939393939
MOVQ XMM0, RDX
MOV RDX, 0x7777777777777777
MOVQ XMM1, RDX
MOV RDX, 0x2021202120212021
MOVQ XMM2, RDX
MOV RDX, 0x0123456789012345
MOVQ XMM3, RDX
MOV RDX, 0x0000400000005000
MOVQ XMM6, RDX
MOV RDX, 0x0000000300000002
MOVQ XMM7, RDX
}
DumpXMM;
asm
{
PSLLDQ XMM0, 8
PSLLDQ XMM1, 8
PSLLDQ XMM2, 8
PSLLDQ XMM3, 8
PHADDD XMM6, XMM7
}
DumpXMM;
return res;
}
"\n$$BK,1$$Unassembling and running SSE demo.\n$$BK,0$$";
Uf("DemoSSE"); "\n";
DemoSSE;

View file

@ -13,7 +13,7 @@ CTask *sys_winmgr_task,
U8 *rev_bits_table; //Table with U8 bits reversed
CDate local_time_offset;
F64 *pow10_I64,
sys_os_version = 1.02;
sys_os_version = 1.03;
CAutoCompleteDictGlobals acd;
CAutoCompleteGlobals ac;

View file

@ -1890,11 +1890,11 @@ class CInst
{
U8 ins_entry_num, //This entry num in opcode hash entry
opcode_count,
opcode[4];
opcode[8];
U16 flags;
U8 slash_val, uasm_slash_val, opcode_modifier,
arg1, arg2,
size1, size2,//Size in bits
arg1, arg2, arg3,
size1, size2, size3, //Size in bits
pad;
};
@ -1950,13 +1950,13 @@ class CAOTBinBlk
I64 class CAbsCountsI64
{
U16 abs_address, //Only odd/even matters. Count of absolute address in an exp.
c_address; //Only odd/even matters. Count of C address in an exp.
U16 abs_address, //Only odd/even matters. Count of absolute address in an expression.
c_address; //Only odd/even matters. Count of C address in an expression.
U32 externs; //Only nonzero matters. Some regions have externs banned.
};
class CAsmUndefHash
{//Only place created is $LK,"Exp Parser",A="FF:::/Compiler/ParseExp.CC,sizeof(CAsmUndefHash)"$ when an undef is found in an ASM exp.
{//Only place created is $LK,"Exp Parser",A="FF:::/Compiler/ParseExp.CC,sizeof(CAsmUndefHash)"$ when an undef is found in an ASM expression.
CAsmUndefHash *next;
CHashExport *hash;
};
@ -2083,12 +2083,12 @@ class CAsmUnresolvedRef
#define ARGT_MM 48 // Not implemented
#define ARGT_MM32 49 // Not implemented
#define ARGT_MM64 50 // Not implemented
#define ARGT_XMM 51 // Not implemented
#define ARGT_XMM 51
#define ARGT_XMM32 52 // Not implemented
#define ARGT_XMM64 53 // Not implemented
#define ARGT_XMM128 54 // Not implemented
#define ARGT_XMM0 55 // Not implemented
#define ARGT_XMM32 52
#define ARGT_XMM64 53
#define ARGT_XMM128 54
#define ARGT_XMM0 55
#help_index "Compiler/Internal;Hash/System"
#define OCF_ALIAS 1
@ -2193,7 +2193,7 @@ class CLexFile
class CAOTCtrl
{
I64 rip; //Instruction pointer
CAsmArg arg1, arg2;
CAsmArg arg1, arg2, arg3;
CAOTBinBlk *bin;
I64 num_bin_U8s,
max_align_bits, org;

View file

@ -15,7 +15,7 @@ U0 LoadDocDefines()
//$LK,"DD_BOOT_HIGH_LOC_DVD",A="FF:::/System/Boot/BootDVD.CC,DD_BOOT_HIGH_LOC_DVD"$
$TR,"LineRep"$
$ID,2$DefinePrint("DD_ZEALOS_LOC","94,836");
$ID,2$DefinePrint("DD_ZEALOS_LOC","95,188");
$ID,-2$
DefinePrint("DD_MP_VECT", "%08X", MP_VECT_ADDR);
DefinePrint("DD_MP_VECT_END", "%08X", MP_VECT_ADDR + COREAP_16BIT_INIT_END - COREAP_16BIT_INIT - 1);