ref: 213bf5089365d00d9d40635bcfe62e197d548c1b
parent: e3883b050e1784f97bd6474c6af73023fe3bbe44
author: jpathy <[email protected]>
date: Tue May 21 19:15:13 EDT 2013
add 6(a|l) sse support to 8(a|l)
--- a/sys/src/cmd/8a/a.y
+++ b/sys/src/cmd/8a/a.y
@@ -20,9 +20,9 @@
%left '+' '-'
%left '*' '/' '%'
%token <lval> LTYPE0 LTYPE1 LTYPE2 LTYPE3 LTYPE4
-%token <lval> LTYPEC LTYPED LTYPEN LTYPER LTYPET LTYPES LTYPEM LTYPEI LTYPEG
+%token <lval> LTYPEC LTYPED LTYPEN LTYPER LTYPET LTYPES LTYPEM LTYPEI LTYPEG LTYPEXC LTYPEX
%token <lval> LCONST LFP LPC LSB
-%token <lval> LBREG LLREG LSREG LFREG
+%token <lval> LBREG LLREG LSREG LFREG LMREG LXREG
%token <dval> LFCONST
%token <sval> LSCONST LSP
%token <sym> LNAME LLAB LVAR
@@ -30,7 +30,7 @@
%type <con2> con2
%type <gen> mem imm imm2 reg nam rel rem rim rom omem nmem
%type <gen2> nonnon nonrel nonrem rimnon rimrem remrim
-%type <gen2> spec1 spec2 spec3 spec4 spec5 spec6 spec7 spec8
+%type <gen2> spec1 spec2 spec3 spec4 spec5 spec6 spec7 spec8 spec9 spec10
%%
prog:
| prog line
@@ -79,6 +79,8 @@
| LTYPEM spec6 { outcode($1, &$2); }
| LTYPEI spec7 { outcode($1, &$2); }
| LTYPEG spec8 { outcode($1, &$2); }
+| LTYPEXC spec9 { outcode($1, &$2); }
+| LTYPEX spec10 { outcode($1, &$2); }
nonnon:
{
@@ -237,7 +239,24 @@
$$.from.scale = $3;
$$.to = $5;
}
+spec9: /* CMPPS/CMPPD */
+ reg ',' rem ',' con
+ {
+ $$.from = $1;
+ $$.to = $3;
+ $$.from.offset = $5;
+ }
+spec10: /* shufl */
+ imm ',' rem ',' reg
+ {
+ $$.from = $3;
+ $$.to = $5;
+ if($1.type != D_CONST)
+ yyerror("illegal constant");
+ $$.to.offset = $1.offset;
+ }
+
rem:
reg
| mem
@@ -301,6 +320,11 @@
$$ = nullgen;
$$.type = $1;
}
+| LMREG
+ {
+ $$ = nullgen;
+ $$.type = $1;
+ }
| LSP
{
$$ = nullgen;
@@ -307,6 +331,11 @@
$$.type = D_SP;
}
| LSREG
+ {
+ $$ = nullgen;
+ $$.type = $1;
+ }
+| LXREG
{
$$ = nullgen;
$$.type = $1;
--- a/sys/src/cmd/8a/lex.c
+++ b/sys/src/cmd/8a/lex.c
@@ -192,6 +192,24 @@
"F6", LFREG, D_F0+6,
"F7", LFREG, D_F0+7,
+ "M0", LMREG, D_M0+0,
+ "M1", LMREG, D_M0+1,
+ "M2", LMREG, D_M0+2,
+ "M3", LMREG, D_M0+3,
+ "M4", LMREG, D_M0+4,
+ "M5", LMREG, D_M0+5,
+ "M6", LMREG, D_M0+6,
+ "M7", LMREG, D_M0+7,
+
+ "X0", LXREG, D_X0+0,
+ "X1", LXREG, D_X0+1,
+ "X2", LXREG, D_X0+2,
+ "X3", LXREG, D_X0+3,
+ "X4", LXREG, D_X0+4,
+ "X5", LXREG, D_X0+5,
+ "X6", LXREG, D_X0+6,
+ "X7", LXREG, D_X0+7,
+
"CS", LSREG, D_CS,
"SS", LSREG, D_SS,
"DS", LSREG, D_DS,
@@ -277,6 +295,7 @@
"CMPXCHGB", LTYPE3, ACMPXCHGB,
"CMPXCHGL", LTYPE3, ACMPXCHGL,
"CMPXCHGW", LTYPE3, ACMPXCHGW,
+ "CPUID", LTYPE0, ACPUID,
"DAA", LTYPE0, ADAA,
"DAS", LTYPE0, ADAS,
"DATA", LTYPED, ADATA,
@@ -638,6 +657,188 @@
"FXTRACT", LTYPE0, AFXTRACT,
"FYL2X", LTYPE0, AFYL2X,
"FYL2XP1", LTYPE0, AFYL2XP1,
+
+ "ADDPD", LTYPE3, AADDPD,
+ "ADDPS", LTYPE3, AADDPS,
+ "ADDSD", LTYPE3, AADDSD,
+ "ADDSS", LTYPE3, AADDSS,
+ "ANDNPD", LTYPE3, AANDNPD,
+ "ANDNPS", LTYPE3, AANDNPS,
+ "ANDPD", LTYPE3, AANDPD,
+ "ANDPS", LTYPE3, AANDPS,
+ "CMPPD", LTYPEXC,ACMPPD,
+ "CMPPS", LTYPEXC,ACMPPS,
+ "CMPSD", LTYPEXC,ACMPSD,
+ "CMPSS", LTYPEXC,ACMPSS,
+ "COMISD", LTYPE3, ACOMISD,
+ "COMISS", LTYPE3, ACOMISS,
+ "CVTPL2PD", LTYPE3, ACVTPL2PD,
+ "CVTPL2PS", LTYPE3, ACVTPL2PS,
+ "CVTPD2PL", LTYPE3, ACVTPD2PL,
+ "CVTPD2PS", LTYPE3, ACVTPD2PS,
+ "CVTPS2PL", LTYPE3, ACVTPS2PL,
+ "PF2IW", LTYPE3, APF2IW,
+ "PF2IL", LTYPE3, APF2IL,
+ "PF2ID", LTYPE3, APF2IL, /* syn */
+ "PI2FL", LTYPE3, API2FL,
+ "PI2FD", LTYPE3, API2FL, /* syn */
+ "PI2FW", LTYPE3, API2FW,
+ "CVTPS2PD", LTYPE3, ACVTPS2PD,
+ "CVTSD2SL", LTYPE3, ACVTSD2SL,
+ "CVTSD2SS", LTYPE3, ACVTSD2SS,
+ "CVTSL2SD", LTYPE3, ACVTSL2SD,
+ "CVTSL2SS", LTYPE3, ACVTSL2SS,
+ "CVTSS2SD", LTYPE3, ACVTSS2SD,
+ "CVTSS2SL", LTYPE3, ACVTSS2SL,
+ "CVTTPD2PL", LTYPE3, ACVTTPD2PL,
+ "CVTTPS2PL", LTYPE3, ACVTTPS2PL,
+ "CVTTSD2SL", LTYPE3, ACVTTSD2SL,
+ "CVTTSS2SL", LTYPE3, ACVTTSS2SL,
+ "DIVPD", LTYPE3, ADIVPD,
+ "DIVPS", LTYPE3, ADIVPS,
+ "DIVSD", LTYPE3, ADIVSD,
+ "DIVSS", LTYPE3, ADIVSS,
+ "FXRSTOR", LTYPE2, AFXRSTOR,
+ "FXSAVE", LTYPE1, AFXSAVE,
+ "LDMXCSR", LTYPE2, ALDMXCSR,
+ "MASKMOVOU", LTYPE3, AMASKMOVOU,
+ "MASKMOVDQU", LTYPE3, AMASKMOVOU, /* syn */
+ "MAXPD", LTYPE3, AMAXPD,
+ "MAXPS", LTYPE3, AMAXPS,
+ "MAXSD", LTYPE3, AMAXSD,
+ "MAXSS", LTYPE3, AMAXSS,
+ "MINPD", LTYPE3, AMINPD,
+ "MINPS", LTYPE3, AMINPS,
+ "MINSD", LTYPE3, AMINSD,
+ "MINSS", LTYPE3, AMINSS,
+ "MOVAPD", LTYPE3, AMOVAPD,
+ "MOVAPS", LTYPE3, AMOVAPS,
+ "MOVO", LTYPE3, AMOVO,
+ "MOVOA", LTYPE3, AMOVO, /* syn */
+ "MOVOU", LTYPE3, AMOVOU,
+ "MOVHLPS", LTYPE3, AMOVHLPS,
+ "MOVHPD", LTYPE3, AMOVHPD,
+ "MOVHPS", LTYPE3, AMOVHPS,
+ "MOVLHPS", LTYPE3, AMOVLHPS,
+ "MOVLPD", LTYPE3, AMOVLPD,
+ "MOVLPS", LTYPE3, AMOVLPS,
+ "MOVMSKPD", LTYPE3, AMOVMSKPD,
+ "MOVMSKPS", LTYPE3, AMOVMSKPS,
+ "MOVNTO", LTYPE3, AMOVNTO,
+ "MOVNTDQ", LTYPE3, AMOVNTO, /* syn */
+ "MOVNTPD", LTYPE3, AMOVNTPD,
+ "MOVNTPS", LTYPE3, AMOVNTPS,
+ "MOVNTQ", LTYPE3, AMOVNTQ,
+ "MOVQOZX", LTYPE3, AMOVQOZX,
+ "MOVSD", LTYPE3, AMOVSD,
+ "MOVSS", LTYPE3, AMOVSS,
+ "MOVUPD", LTYPE3, AMOVUPD,
+ "MOVUPS", LTYPE3, AMOVUPS,
+ "MULPD", LTYPE3, AMULPD,
+ "MULPS", LTYPE3, AMULPS,
+ "MULSD", LTYPE3, AMULSD,
+ "MULSS", LTYPE3, AMULSS,
+ "ORPD", LTYPE3, AORPD,
+ "ORPS", LTYPE3, AORPS,
+ "PACKSSLW", LTYPE3, APACKSSLW,
+ "PACKSSWB", LTYPE3, APACKSSWB,
+ "PACKUSWB", LTYPE3, APACKUSWB,
+ "PADDB", LTYPE3, APADDB,
+ "PADDL", LTYPE3, APADDL,
+ "PADDQ", LTYPE3, APADDQ,
+ "PADDSB", LTYPE3, APADDSB,
+ "PADDSW", LTYPE3, APADDSW,
+ "PADDUSB", LTYPE3, APADDUSB,
+ "PADDUSW", LTYPE3, APADDUSW,
+ "PADDW", LTYPE3, APADDW,
+ "PAND", LTYPE3, APAND,
+ "PANDB", LTYPE3, APANDB,
+ "PANDL", LTYPE3, APANDL,
+ "PANDSB", LTYPE3, APANDSB,
+ "PANDSW", LTYPE3, APANDSW,
+ "PANDUSB", LTYPE3, APANDUSB,
+ "PANDUSW", LTYPE3, APANDUSW,
+ "PANDW", LTYPE3, APANDW,
+ "PANDN", LTYPE3, APANDN,
+ "PAVGB", LTYPE3, APAVGB,
+ "PAVGW", LTYPE3, APAVGW,
+ "PCMPEQB", LTYPE3, APCMPEQB,
+ "PCMPEQL", LTYPE3, APCMPEQL,
+ "PCMPEQW", LTYPE3, APCMPEQW,
+ "PCMPGTB", LTYPE3, APCMPGTB,
+ "PCMPGTL", LTYPE3, APCMPGTL,
+ "PCMPGTW", LTYPE3, APCMPGTW,
+ "PEXTRW", LTYPEX, APEXTRW,
+ "PINSRW", LTYPEX, APINSRW,
+ "PMADDWL", LTYPE3, APMADDWL,
+ "PMAXSW", LTYPE3, APMAXSW,
+ "PMAXUB", LTYPE3, APMAXUB,
+ "PMINSW", LTYPE3, APMINSW,
+ "PMINUB", LTYPE3, APMINUB,
+ "PMOVMSKB", LTYPE3, APMOVMSKB,
+ "PMULHRW", LTYPE3, APMULHRW,
+ "PMULHUW", LTYPE3, APMULHUW,
+ "PMULHW", LTYPE3, APMULHW,
+ "PMULLW", LTYPE3, APMULLW,
+ "PMULULQ", LTYPE3, APMULULQ,
+ "POR", LTYPE3, APOR,
+ "PSADBW", LTYPE3, APSADBW,
+ "PSHUFHW", LTYPEX, APSHUFHW,
+ "PSHUFL", LTYPEX, APSHUFL,
+ "PSHUFLW", LTYPEX, APSHUFLW,
+ "PSHUFW", LTYPEX, APSHUFW,
+ "PSLLO", LTYPE3, APSLLO,
+ "PSLLDQ", LTYPE3, APSLLO, /* syn */
+ "PSLLL", LTYPE3, APSLLL,
+ "PSLLQ", LTYPE3, APSLLQ,
+ "PSLLW", LTYPE3, APSLLW,
+ "PSRAL", LTYPE3, APSRAL,
+ "PSRAW", LTYPE3, APSRAW,
+ "PSRLO", LTYPE3, APSRLO,
+ "PSRLDQ", LTYPE3, APSRLO, /* syn */
+ "PSRLL", LTYPE3, APSRLL,
+ "PSRLQ", LTYPE3, APSRLQ,
+ "PSRLW", LTYPE3, APSRLW,
+ "PSUBB", LTYPE3, APSUBB,
+ "PSUBL", LTYPE3, APSUBL,
+ "PSUBQ", LTYPE3, APSUBQ,
+ "PSUBSB", LTYPE3, APSUBSB,
+ "PSUBSW", LTYPE3, APSUBSW,
+ "PSUBUSB", LTYPE3, APSUBUSB,
+ "PSUBUSW", LTYPE3, APSUBUSW,
+ "PSUBW", LTYPE3, APSUBW,
+ "PUNPCKHBW", LTYPE3, APUNPCKHBW,
+ "PUNPCKHLQ", LTYPE3, APUNPCKHLQ,
+ "PUNPCKHQDQ", LTYPE3, APUNPCKHQDQ,
+ "PUNPCKHWL", LTYPE3, APUNPCKHWL,
+ "PUNPCKLBW", LTYPE3, APUNPCKLBW,
+ "PUNPCKLLQ", LTYPE3, APUNPCKLLQ,
+ "PUNPCKLQDQ", LTYPE3, APUNPCKLQDQ,
+ "PUNPCKLWL", LTYPE3, APUNPCKLWL,
+ "PXOR", LTYPE3, APXOR,
+ "RCPPS", LTYPE3, ARCPPS,
+ "RCPSS", LTYPE3, ARCPSS,
+ "RSQRTPS", LTYPE3, ARSQRTPS,
+ "RSQRTSS", LTYPE3, ARSQRTSS,
+ "SHUFPD", LTYPEX, ASHUFPD,
+ "SHUFPS", LTYPEX, ASHUFPS,
+ "SQRTPD", LTYPE3, ASQRTPD,
+ "SQRTPS", LTYPE3, ASQRTPS,
+ "SQRTSD", LTYPE3, ASQRTSD,
+ "SQRTSS", LTYPE3, ASQRTSS,
+ "STMXCSR", LTYPE1, ASTMXCSR,
+ "SUBPD", LTYPE3, ASUBPD,
+ "SUBPS", LTYPE3, ASUBPS,
+ "SUBSD", LTYPE3, ASUBSD,
+ "SUBSS", LTYPE3, ASUBSS,
+ "UCOMISD", LTYPE3, AUCOMISD,
+ "UCOMISS", LTYPE3, AUCOMISS,
+ "UNPCKHPD", LTYPE3, AUNPCKHPD,
+ "UNPCKHPS", LTYPE3, AUNPCKHPS,
+ "UNPCKLPD", LTYPE3, AUNPCKLPD,
+ "UNPCKLPS", LTYPE3, AUNPCKLPS,
+ "XORPD", LTYPE3, AXORPD,
+ "XORPS", LTYPE3, AXORPS,
0
};
--- a/sys/src/cmd/8c/8.out.h
+++ b/sys/src/cmd/8c/8.out.h
@@ -361,6 +361,7 @@
ACMPXCHGB,
ACMPXCHGL,
ACMPXCHGW,
+ ACPUID,
/* conditional move */
ACMOVLCC,
@@ -405,6 +406,185 @@
AFCMOVNU,
AFCMOVUN,
+ /* media */
+ AADDPD,
+ AADDPS,
+ AADDSD,
+ AADDSS,
+ AANDNPD,
+ AANDNPS,
+ AANDPD,
+ AANDPS,
+ ACMPPD,
+ ACMPPS,
+ ACMPSD,
+ ACMPSS,
+ ACOMISD,
+ ACOMISS,
+ ACVTPD2PL,
+ ACVTPD2PS,
+ ACVTPL2PD,
+ ACVTPL2PS,
+ ACVTPS2PD,
+ ACVTPS2PL,
+ ACVTSD2SL,
+ ACVTSD2SS,
+ ACVTSL2SD,
+ ACVTSL2SS,
+ ACVTSS2SD,
+ ACVTSS2SL,
+ ACVTTPD2PL,
+ ACVTTPS2PL,
+ ACVTTSD2SL,
+ ACVTTSS2SL,
+ ADIVPD,
+ ADIVPS,
+ ADIVSD,
+ ADIVSS,
+ AFXRSTOR,
+ AFXSAVE,
+ ALDMXCSR,
+ AMASKMOVOU,
+ AMASKMOVQ,
+ AMAXPD,
+ AMAXPS,
+ AMAXSD,
+ AMAXSS,
+ AMINPD,
+ AMINPS,
+ AMINSD,
+ AMINSS,
+ AMOVAPD,
+ AMOVAPS,
+ AMOVOU,
+ AMOVHLPS,
+ AMOVHPD,
+ AMOVHPS,
+ AMOVLHPS,
+ AMOVLPD,
+ AMOVLPS,
+ AMOVMSKPD,
+ AMOVMSKPS,
+ AMOVNTO,
+ AMOVNTPD,
+ AMOVNTPS,
+ AMOVNTQ,
+ AMOVO,
+ AMOVQOZX,
+ AMOVSD,
+ AMOVSS,
+ AMOVUPD,
+ AMOVUPS,
+ AMULPD,
+ AMULPS,
+ AMULSD,
+ AMULSS,
+ AORPD,
+ AORPS,
+ APACKSSLW,
+ APACKSSWB,
+ APACKUSWB,
+ APADDB,
+ APADDL,
+ APADDQ,
+ APADDSB,
+ APADDSW,
+ APADDUSB,
+ APADDUSW,
+ APADDW,
+ APANDB,
+ APANDL,
+ APANDSB,
+ APANDSW,
+ APANDUSB,
+ APANDUSW,
+ APANDW,
+ APAND,
+ APANDN,
+ APAVGB,
+ APAVGW,
+ APCMPEQB,
+ APCMPEQL,
+ APCMPEQW,
+ APCMPGTB,
+ APCMPGTL,
+ APCMPGTW,
+ APEXTRW,
+ APINSRW,
+ APMADDWL,
+ APMAXSW,
+ APMAXUB,
+ APMINSW,
+ APMINUB,
+ APMOVMSKB,
+ APMULHRW,
+ APMULHUW,
+ APMULHW,
+ APMULLW,
+ APMULULQ,
+ APOR,
+ APSADBW,
+ APSHUFHW,
+ APSHUFL,
+ APSHUFLW,
+ APSHUFW,
+ APSLLO,
+ APSLLL,
+ APSLLQ,
+ APSLLW,
+ APSRAL,
+ APSRAW,
+ APSRLO,
+ APSRLL,
+ APSRLQ,
+ APSRLW,
+ APSUBB,
+ APSUBL,
+ APSUBQ,
+ APSUBSB,
+ APSUBSW,
+ APSUBUSB,
+ APSUBUSW,
+ APSUBW,
+ APSWAPL,
+ APUNPCKHBW,
+ APUNPCKHLQ,
+ APUNPCKHQDQ,
+ APUNPCKHWL,
+ APUNPCKLBW,
+ APUNPCKLLQ,
+ APUNPCKLQDQ,
+ APUNPCKLWL,
+ APXOR,
+ ARCPPS,
+ ARCPSS,
+ ARSQRTPS,
+ ARSQRTSS,
+ ASHUFPD,
+ ASHUFPS,
+ ASQRTPD,
+ ASQRTPS,
+ ASQRTSD,
+ ASQRTSS,
+ ASTMXCSR,
+ ASUBPD,
+ ASUBPS,
+ ASUBSD,
+ ASUBSS,
+ AUCOMISD,
+ AUCOMISS,
+ AUNPCKHPD,
+ AUNPCKHPS,
+ AUNPCKLPD,
+ AUNPCKLPS,
+ AXORPD,
+ AXORPS,
+
+ APF2IW,
+ APF2IL,
+ API2FW,
+ API2FL,
+
/* add new operations here. nowhere else. here. */
ALAST
};
@@ -470,6 +650,10 @@
D_CONST2 = D_INDIR+D_INDIR,
D_SIZE, /* 8l internal */
+
+ D_M0,
+ D_X0 = D_M0 + 8,
+ D_XNONE = D_X0 + 8,
T_TYPE = 1<<0,
T_INDEX = 1<<1,
--- a/sys/src/cmd/8c/enam.c
+++ b/sys/src/cmd/8c/enam.c
@@ -340,6 +340,7 @@
"CMPXCHGB",
"CMPXCHGL",
"CMPXCHGW",
+ "CPUID",
"CMOVLCC",
"CMOVLCS",
"CMOVLEQ",
@@ -380,5 +381,181 @@
"FCMOVNE",
"FCMOVNU",
"FCMOVUN",
+ "ADDPD",
+ "ADDPS",
+ "ADDSD",
+ "ADDSS",
+ "ANDNPD",
+ "ANDNPS",
+ "ANDPD",
+ "ANDPS",
+ "CMPPD",
+ "CMPPS",
+ "CMPSD",
+ "CMPSS",
+ "COMISD",
+ "COMISS",
+ "CVTPD2PL",
+ "CVTPD2PS",
+ "CVTPL2PD",
+ "CVTPL2PS",
+ "CVTPS2PD",
+ "CVTPS2PL",
+ "CVTSD2SL",
+ "CVTSD2SS",
+ "CVTSL2SD",
+ "CVTSL2SS",
+ "CVTSS2SD",
+ "CVTSS2SL",
+ "CVTTPD2PL",
+ "CVTTPS2PL",
+ "CVTTSD2SL",
+ "CVTTSS2SL",
+ "DIVPD",
+ "DIVPS",
+ "DIVSD",
+ "DIVSS",
+ "FXRSTOR",
+ "FXSAVE",
+ "LDMXCSR",
+ "MASKMOVOU",
+ "MASKMOVQ",
+ "MAXPD",
+ "MAXPS",
+ "MAXSD",
+ "MAXSS",
+ "MINPD",
+ "MINPS",
+ "MINSD",
+ "MINSS",
+ "MOVAPD",
+ "MOVAPS",
+ "MOVOU",
+ "MOVHLPS",
+ "MOVHPD",
+ "MOVHPS",
+ "MOVLHPS",
+ "MOVLPD",
+ "MOVLPS",
+ "MOVMSKPD",
+ "MOVMSKPS",
+ "MOVNTO",
+ "MOVNTPD",
+ "MOVNTPS",
+ "MOVNTQ",
+ "MOVO",
+ "MOVQOZX",
+ "MOVSD",
+ "MOVSS",
+ "MOVUPD",
+ "MOVUPS",
+ "MULPD",
+ "MULPS",
+ "MULSD",
+ "MULSS",
+ "ORPD",
+ "ORPS",
+ "PACKSSLW",
+ "PACKSSWB",
+ "PACKUSWB",
+ "PADDB",
+ "PADDL",
+ "PADDQ",
+ "PADDSB",
+ "PADDSW",
+ "PADDUSB",
+ "PADDUSW",
+ "PADDW",
+ "PANDB",
+ "PANDL",
+ "PANDSB",
+ "PANDSW",
+ "PANDUSB",
+ "PANDUSW",
+ "PANDW",
+ "PAND",
+ "PANDN",
+ "PAVGB",
+ "PAVGW",
+ "PCMPEQB",
+ "PCMPEQL",
+ "PCMPEQW",
+ "PCMPGTB",
+ "PCMPGTL",
+ "PCMPGTW",
+ "PEXTRW",
+ "PINSRW",
+ "PMADDWL",
+ "PMAXSW",
+ "PMAXUB",
+ "PMINSW",
+ "PMINUB",
+ "PMOVMSKB",
+ "PMULHRW",
+ "PMULHUW",
+ "PMULHW",
+ "PMULLW",
+ "PMULULQ",
+ "POR",
+ "PSADBW",
+ "PSHUFHW",
+ "PSHUFL",
+ "PSHUFLW",
+ "PSHUFW",
+ "PSLLO",
+ "PSLLL",
+ "PSLLQ",
+ "PSLLW",
+ "PSRAL",
+ "PSRAW",
+ "PSRLO",
+ "PSRLL",
+ "PSRLQ",
+ "PSRLW",
+ "PSUBB",
+ "PSUBL",
+ "PSUBQ",
+ "PSUBSB",
+ "PSUBSW",
+ "PSUBUSB",
+ "PSUBUSW",
+ "PSUBW",
+ "PSWAPL",
+ "PUNPCKHBW",
+ "PUNPCKHLQ",
+ "PUNPCKHQDQ",
+ "PUNPCKHWL",
+ "PUNPCKLBW",
+ "PUNPCKLLQ",
+ "PUNPCKLQDQ",
+ "PUNPCKLWL",
+ "PXOR",
+ "RCPPS",
+ "RCPSS",
+ "RSQRTPS",
+ "RSQRTSS",
+ "SHUFPD",
+ "SHUFPS",
+ "SQRTPD",
+ "SQRTPS",
+ "SQRTSD",
+ "SQRTSS",
+ "STMXCSR",
+ "SUBPD",
+ "SUBPS",
+ "SUBSD",
+ "SUBSS",
+ "UCOMISD",
+ "UCOMISS",
+ "UNPCKHPD",
+ "UNPCKHPS",
+ "UNPCKLPD",
+ "UNPCKLPS",
+ "XORPD",
+ "XORPS",
+ "PF2IW",
+ "PF2IL",
+ "PI2FW",
+ "PI2FL",
"LAST",
};
--- a/sys/src/cmd/8l/l.h
+++ b/sys/src/cmd/8l/l.h
@@ -90,7 +90,7 @@
short as;
uchar* ytab;
uchar prefix;
- uchar op[10];
+ uchar op[20];
};
enum
@@ -142,6 +142,8 @@
Ycr0, Ycr1, Ycr2, Ycr3, Ycr4, Ycr5, Ycr6, Ycr7,
Ydr0, Ydr1, Ydr2, Ydr3, Ydr4, Ydr5, Ydr6, Ydr7,
Ytr0, Ytr1, Ytr2, Ytr3, Ytr4, Ytr5, Ytr6, Ytr7,
+ Ymr, Ymm,
+ Yxr, Yxm,
Ymax,
Zxxx = 0,
@@ -153,6 +155,7 @@
Zib_,
Zib_rp,
Zibo_m,
+ Zibo_m_xm,
Zil_,
Zil_rp,
Zilo_m,
@@ -160,10 +163,16 @@
Zloop,
Zm_o,
Zm_r,
+ Zm_r_xm,
+ Zm_r_i_xm,
+ Zm_r_3d,
+ Zibm_r, /* mmx1,mmx2/mem64,imm8 */
Zaut_r,
Zo_m,
Zpseudo,
Zr_m,
+ Zr_m_xm,
+ Zr_m_i_xm,
Zrp_,
Z_ib,
Z_il,
@@ -181,6 +190,8 @@
Pm = 0x0f, /* 2byte opcode escape */
Pq = 0xff, /* both escape */
Pb = 0xfe, /* byte operands */
+ Pf2 = 0xf2, /* xmm escape 1 */
+ Pf3 = 0xf3, /* xmm escape 2 */
Roffset = 22, /* no. bits for offset in relocation address */
Rindex = 10, /* no. bits for index in relocation address */
@@ -250,7 +261,7 @@
EXTERN char ycover[Ymax*Ymax];
EXTERN uchar* andptr;
EXTERN uchar and[30];
-EXTERN char reg[D_NONE];
+EXTERN char reg[D_XNONE];
EXTERN Prog* lastp;
EXTERN long lcsize;
EXTERN int nerrors;
@@ -279,6 +290,7 @@
#define UP (&undefp)
extern Optab optab[];
+extern Optab* opindex[];
extern char* anames[];
int Aconv(Fmt*);
--- a/sys/src/cmd/8l/list.c
+++ b/sys/src/cmd/8l/list.c
@@ -61,7 +61,7 @@
a = va_arg(fp->args, Adr*);
i = a->type;
- if(i >= D_INDIR) {
+ if(i >= D_INDIR && i < D_M0) {
if(a->offset)
snprint(str, sizeof(str), "%ld(%R)", a->offset, i-D_INDIR);
else
@@ -208,6 +208,24 @@
"TR7",
"NONE", /* [D_NONE] */
+
+[D_M0] "M0", /* [D_M0] */
+ "M1",
+ "M2",
+ "M3",
+ "M4",
+ "M5",
+ "M6",
+ "M7",
+
+[D_X0] "X0", /* [D_X0] */
+ "X1",
+ "X2",
+ "X3",
+ "X4",
+ "X5",
+ "X6",
+ "X7",
};
int
@@ -217,7 +235,7 @@
int r;
r = va_arg(fp->args, int);
- if(r >= D_AL && r <= D_NONE)
+ if((r >= D_AL && r <= D_NONE) || (r >= D_M0 && r <= D_X0+7))
snprint(str, sizeof(str), "%s", regstr[r-D_AL]);
else
snprint(str, sizeof(str), "gok(%d)", r);
--- a/sys/src/cmd/8l/obj.c
+++ b/sys/src/cmd/8l/obj.c
@@ -195,11 +195,14 @@
Bprint(&bso, "HEADER = -H0x%ld -T0x%lux -D0x%lux -R0x%lux\n",
HEADTYPE, INITTEXT, INITDAT, INITRND);
Bflush(&bso);
- for(i=1; optab[i].as; i++)
- if(i != optab[i].as) {
- diag("phase error in optab: %d", i);
+ for(i=1; optab[i].as; i++) {
+ c = optab[i].as;
+ if(opindex[c] != nil) {
+ diag("phase error in optab: %d (%A)", i, c);
errorexit();
}
+ opindex[c] = &optab[i];
+ }
for(i=0; i<Ymax; i++)
ycover[i*Ymax + i] = 1;
@@ -240,7 +243,13 @@
ycover[Yrl*Ymax + Yml] = 1;
ycover[Ym*Ymax + Yml] = 1;
- for(i=0; i<D_NONE; i++) {
+ ycover[Ym*Ymax + Ymm] = 1;
+ ycover[Ymr*Ymax + Ymm] = 1;
+
+ ycover[Ym*Ymax + Yxm] = 1;
+ ycover[Yxr*Ymax + Yxm] = 1;
+
+ for(i=0; i<D_XNONE; i++) {
reg[i] = -1;
if(i >= D_AL && i <= D_BH)
reg[i] = (i-D_AL) & 7;
@@ -248,6 +257,10 @@
reg[i] = (i-D_AX) & 7;
if(i >= D_F0 && i <= D_F0+7)
reg[i] = (i-D_F0) & 7;
+ if(i >= D_M0 && i <= D_M0+7)
+ reg[i] = (i-D_M0) & 7;
+ if(i >= D_X0 && i <= D_X0+7)
+ reg[i] = (i-D_X0) & 7;
}
zprg.link = P;
@@ -988,6 +1001,13 @@
case AFDIVRF:
case AFCOMF:
case AFCOMFP:
+ case AMOVSS:
+ case AADDSS:
+ case ASUBSS:
+ case AMULSS:
+ case ADIVSS:
+ case ACOMISS:
+ case AUCOMISS:
if(skip)
goto casdef;
if(p->from.type == D_FCONST) {
@@ -1026,6 +1046,13 @@
case AFDIVRD:
case AFCOMD:
case AFCOMDP:
+ case AMOVSD:
+ case AADDSD:
+ case ASUBSD:
+ case AMULSD:
+ case ADIVSD:
+ case ACOMISD:
+ case AUCOMISD:
if(skip)
goto casdef;
if(p->from.type == D_FCONST) {
--- a/sys/src/cmd/8l/optab.c
+++ b/sys/src/cmd/8l/optab.c
@@ -15,8 +15,10 @@
Ynone, Ynone, Zpseudo,1,
Ynone, Yml, Zpseudo,1,
Ynone, Yrf, Zpseudo,1,
+ Ynone, Yxr, Zpseudo,1,
Yml, Ynone, Zpseudo,1,
Yrf, Ynone, Zpseudo,1,
+ Yxr, Ynone, Zpseudo,1,
0
};
uchar yxorb[] =
@@ -120,6 +122,10 @@
// Yi0, Yml, Zibo_m, 2, // shorter but slower AND $0,dst
Yi32, Yrl, Zil_rp, 1,
Yi32, Yml, Zilo_m, 2,
+ Yml, Ymr, Zm_r_xm, 1, // MMX MOVD
+ Ymr, Yml, Zr_m_xm, 1, // MMX MOVD
+ Yml, Yxr, Zm_r_xm, 2, // XMM MOVD (32 bit)
+ Yxr, Yml, Zr_m_xm, 2, // XMM MOVD (32 bit)
Yiauto, Yrl, Zaut_r, 2,
0
};
@@ -306,6 +312,134 @@
Ym, Ynone, Zm_o, 2,
0
};
+uchar ymm[] =
+{
+ Ymm, Ymr, Zm_r_xm, 1,
+ Yxm, Yxr, Zm_r_xm, 2,
+ 0
+};
+uchar yxm[] =
+{
+ Yxm, Yxr, Zm_r_xm, 1,
+ 0
+};
+uchar yxcvm1[] =
+{
+ Yxm, Yxr, Zm_r_xm, 2,
+ 0
+};
+uchar yxcvm2[] =
+{
+ Yxm, Yxr, Zm_r_xm, 2,
+ 0
+};
+uchar yxmq[] =
+{
+ Yxm, Yxr, Zm_r_xm, 2,
+ 0
+};
+uchar yxr[] =
+{
+ Yxr, Yxr, Zm_r_xm, 1,
+ 0
+};
+uchar yxr_ml[] =
+{
+ Yxr, Yml, Zr_m_xm, 1,
+ 0
+};
+uchar ymr[] =
+{
+ Ymr, Ymr, Zm_r, 1,
+ 0
+};
+uchar ymr_ml[] =
+{
+ Ymr, Yml, Zr_m_xm, 1,
+ 0
+};
+uchar yxcmp[] =
+{
+ Yxm, Yxr, Zm_r_xm, 1,
+ 0
+};
+uchar yxcmpi[] =
+{
+ Yxm, Yxr, Zm_r_i_xm, 2,
+ 0
+};
+uchar yxmov[] =
+{
+ Yxm, Yxr, Zm_r_xm, 1,
+ Yxr, Yxm, Zr_m_xm, 1,
+ 0
+};
+uchar yxcvfl[] =
+{
+ Yxm, Yrl, Zm_r_xm, 1,
+ 0
+};
+uchar yxcvlf[] =
+{
+ Yml, Yxr, Zm_r_xm, 1,
+ 0
+};
+uchar yps[] =
+{
+ Ymm, Ymr, Zm_r_xm, 1,
+ Yi8, Ymr, Zibo_m_xm, 2,
+ Yxm, Yxr, Zm_r_xm, 2,
+ Yi8, Yxr, Zibo_m_xm, 3,
+ 0
+};
+uchar yxrrl[] =
+{
+ Yxr, Yrl, Zm_r, 1,
+ 0
+};
+uchar ymfp[] =
+{
+ Ymm, Ymr, Zm_r_3d, 1,
+ 0,
+};
+uchar ymrxr[] =
+{
+ Ymr, Yxr, Zm_r, 1,
+ Yxm, Yxr, Zm_r_xm, 1,
+ 0
+};
+uchar ymshuf[] =
+{
+ Ymm, Ymr, Zibm_r, 1,
+ 0
+};
+uchar yxshuf[] =
+{
+ Yxm, Yxr, Zibm_r, 1,
+ 0
+};
+uchar yextrw[] =
+{
+ Yxr, Yrl, Zibm_r, 1,
+ 0
+};
+uchar ypsdq[] =
+{
+ Yi8, Yxr, Zibo_m, 2,
+ 0
+};
+uchar ymskb[] =
+{
+ Yxr, Yrl, Zm_r_xm, 2,
+ Ymr, Yrl, Zm_r_xm, 1,
+ 0
+};
+uchar yxaes[] =
+{
+ Yxm, Yxr, Zm_r_xm, 2,
+ Yxm, Yxr, Zm_r_i_xm, 2,
+ 0
+};
Optab optab[] =
/* as, ytab, andproto, opcode */
@@ -320,10 +454,18 @@
{ AADCW, yxorl, Pe, 0x83,(02),0x15,0x81,(02),0x11,0x13 },
{ AADDB, yxorb, Px, 0x04,0x80,(00),0x00,0x02 },
{ AADDL, yaddl, Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
+ { AADDPD, yxm, Pq, 0x58 },
+ { AADDPS, yxm, Pm, 0x58 },
+ { AADDSD, yxm, Pf2, 0x58 },
+ { AADDSS, yxm, Pf3, 0x58 },
{ AADDW, yaddl, Pe, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
{ AADJSP },
{ AANDB, yxorb, Pb, 0x24,0x80,(04),0x20,0x22 },
{ AANDL, yxorl, Px, 0x83,(04),0x25,0x81,(04),0x21,0x23 },
+ { AANDNPD, yxm, Pq, 0x55 },
+ { AANDNPS, yxm, Pm, 0x55 },
+ { AANDPD, yxm, Pq, 0x54 },
+ { AANDPS, yxm, Pq, 0x54 },
{ AANDW, yxorl, Pe, 0x83,(04),0x25,0x81,(04),0x21,0x23 },
{ AARPL, yrl_ml, Px, 0x63 },
{ ABOUNDL, yrl_m, Px, 0x62 },
@@ -349,9 +491,32 @@
{ ACMC, ynone, Px, 0xf5 },
{ ACMPB, ycmpb, Pb, 0x3c,0x80,(07),0x38,0x3a },
{ ACMPL, ycmpl, Px, 0x83,(07),0x3d,0x81,(07),0x39,0x3b },
+ { ACMPPD, yxcmpi, Px, Pe,0xc2 },
+ { ACMPPS, yxcmpi, Pm, 0xc2,0 },
{ ACMPW, ycmpl, Pe, 0x83,(07),0x3d,0x81,(07),0x39,0x3b },
+ { ACOMISD, yxcmp, Pe, 0x2f },
+ { ACOMISS, yxcmp, Pm, 0x2f },
+ { ACPUID, ynone, Pm, 0xa2 },
+ { ACVTPL2PD, yxcvm2, Px, Pf3,0xe6 },
+ { ACVTPL2PS, yxcvm2, Pm, 0x5b },
+ { ACVTPD2PL, yxcvm1, Px, Pf2,0xe6 },
+ { ACVTPD2PS, yxm, Pe, 0x5a },
+ { ACVTPS2PL, yxcvm1, Px, Pe,0x5b },
+ { ACVTPS2PD, yxm, Pm, 0x5a },
+ { ACVTSD2SL, yxcvfl, Pf2, 0x2d },
+ { ACVTSD2SS, yxm, Pf2, 0x5a },
+ { ACVTSL2SD, yxcvlf, Pf2, 0x2a },
+ { ACVTSL2SS, yxcvlf, Pf3, 0x2a },
+ { ACVTSS2SD, yxm, Pf3, 0x5a },
+ { ACVTSS2SL, yxcvfl, Pf3, 0x2d },
+ { ACVTTPD2PL, yxcvm1, Px, Pe,0xe6 },
+ { ACVTTPS2PL, yxcvm1, Px, Pf3,0x5b },
+ { ACVTTSD2SL, yxcvfl, Pf2, 0x2c },
+ { ACVTTSS2SL, yxcvfl, Pf3, 0x2c },
{ ACMPSB, ynone, Pb, 0xa6 },
+ { ACMPSD, yxcmpi, Px, Pf2,0xc2 },
{ ACMPSL, ynone, Px, 0xa7 },
+ { ACMPSS, yxcmpi, Px, Pf3,0xc2 },
{ ACMPSW, ynone, Pe, 0xa7 },
{ ADAA, ynone, Px, 0x27 },
{ ADAS, ynone, Px, 0x2f },
@@ -361,8 +526,14 @@
{ ADECW, yincl, Pe, 0x48,0xff,(01) },
{ ADIVB, ydivb, Pb, 0xf6,(06) },
{ ADIVL, ydivl, Px, 0xf7,(06) },
+ { ADIVPD, yxm, Pe, 0x5e },
+ { ADIVPS, yxm, Pm, 0x5e },
+ { ADIVSD, yxm, Pf2, 0x5e },
+ { ADIVSS, yxm, Pf3, 0x5e },
{ ADIVW, ydivl, Pe, 0xf7,(06) },
{ AENTER }, /* botch */
+ { AFXRSTOR, ysvrs, Pm, 0xae,(01),0xae,(01) },
+ { AFXSAVE, ysvrs, Pm, 0xae,(00),0xae,(00) },
{ AGLOBL },
{ AGOK },
{ AHISTORY },
@@ -407,6 +578,7 @@
{ ALAHF, ynone, Px, 0x9f },
{ ALARL, yml_rl, Pm, 0x02 },
{ ALARW, yml_rl, Pq, 0x02 },
+ { ALDMXCSR, ysvrs, Pm, 0xae,(02),0xae,(02) },
{ ALEAL, ym_rl, Px, 0x8d },
{ ALEAW, ym_rl, Pe, 0x8d },
{ ALEAVEL, ynone, Px, 0xc9 },
@@ -421,8 +593,20 @@
{ ALOOPNE, yloop, Px, 0xe0 },
{ ALSLL, yml_rl, Pm, 0x03 },
{ ALSLW, yml_rl, Pq, 0x03 },
+ { AMASKMOVOU, yxr, Pe, 0xf7 },
+ { AMASKMOVQ, ymr, Pm, 0xf7 },
+ { AMAXPD, yxm, Pe, 0x5f },
+ { AMAXPS, yxm, Pm, 0x5f },
+ { AMAXSD, yxm, Pf2, 0x5f },
+ { AMAXSS, yxm, Pf3, 0x5f },
+ { AMINPD, yxm, Pe, 0x5d },
+ { AMINPS, yxm, Pm, 0x5d },
+ { AMINSD, yxm, Pf2, 0x5d },
+ { AMINSS, yxm, Pf3, 0x5d },
+ { AMOVAPD, yxmov, Pe, 0x28,0x29 },
+ { AMOVAPS, yxmov, Pm, 0x28,0x29 },
{ AMOVB, ymovb, Pb, 0x88,0x8a,0xb0,0xc6,(00) },
- { AMOVL, ymovl, Px, 0x89,0x8b,0x31,0x83,(04),0xb8,0xc7,(00) },
+ { AMOVL, ymovl, Px, 0x89,0x8b,0x31,0x83,(04),0xb8,0xc7,(00),0x6e,0x7e,Pe,0x6e,Pe,0x7e },
{ AMOVW, ymovl, Pe, 0x89,0x8b,0x31,0x83,(04),0xb8,0xc7,(00) },
{ AMOVBLSX, ymb_rl, Pm, 0xbe },
{ AMOVBLZX, ymb_rl, Pm, 0xb6 },
@@ -430,11 +614,34 @@
{ AMOVBWZX, ymb_rl, Pq, 0xb6 },
{ AMOVWLSX, yml_rl, Pm, 0xbf },
{ AMOVWLZX, yml_rl, Pm, 0xb7 },
+ { AMOVO, yxmov, Pe, 0x6f,0x7f },
+ { AMOVOU, yxmov, Pf3, 0x6f,0x7f },
+ { AMOVHLPS, yxr, Pm, 0x12 },
+ { AMOVHPD, yxmov, Pe, 0x16,0x17 },
+ { AMOVHPS, yxmov, Pm, 0x16,0x17 },
+ { AMOVLHPS, yxr, Pm, 0x16 },
+ { AMOVLPD, yxmov, Pe, 0x12,0x13 },
+ { AMOVLPS, yxmov, Pm, 0x12,0x13 },
+ { AMOVMSKPD, yxrrl, Pq, 0x50 },
+ { AMOVMSKPS, yxrrl, Pm, 0x50 },
+ { AMOVNTO, yxr_ml, Pe, 0xe7 },
+ { AMOVNTPD, yxr_ml, Pe, 0x2b },
+ { AMOVNTPS, yxr_ml, Pm, 0x2b },
+ { AMOVNTQ, ymr_ml, Pm, 0xe7 },
+ { AMOVQOZX, ymrxr, Pf3, 0xd6,0x7e },
{ AMOVSB, ynone, Pb, 0xa4 },
+ { AMOVSD, yxmov, Pf2, 0x10,0x11 },
{ AMOVSL, ynone, Px, 0xa5 },
+ { AMOVSS, yxmov, Pf3, 0x10,0x11 },
{ AMOVSW, ynone, Pe, 0xa5 },
+ { AMOVUPD, yxmov, Pe, 0x10,0x11 },
+ { AMOVUPS, yxmov, Pm, 0x10,0x11 },
{ AMULB, ydivb, Pb, 0xf6,(04) },
{ AMULL, ydivl, Px, 0xf7,(04) },
+ { AMULPD, yxm, Pe, 0x59 },
+ { AMULPS, yxm, Ym, 0x59 },
+ { AMULSD, yxm, Pf2, 0x59 },
+ { AMULSS, yxm, Pf3, 0x59 },
{ AMULW, ydivl, Pe, 0xf7,(04) },
{ ANAME },
{ ANEGB, yscond, Px, 0xf6,(03) },
@@ -446,6 +653,8 @@
{ ANOTW, yscond, Pe, 0xf7,(02) },
{ AORB, yxorb, Pb, 0x0c,0x80,(01),0x08,0x0a },
{ AORL, yxorl, Px, 0x83,(01),0x0d,0x81,(01),0x09,0x0b },
+ { AORPD, yxm, Pq, 0x56 },
+ { AORPS, yxm, Pm, 0x56 },
{ AORW, yxorl, Pe, 0x83,(01),0x0d,0x81,(01),0x09,0x0b },
{ AOUTB, yin, Pb, 0xe6,0xee },
{ AOUTL, yin, Px, 0xe7,0xef },
@@ -453,6 +662,44 @@
{ AOUTSB, ynone, Pb, 0x6e },
{ AOUTSL, ynone, Px, 0x6f },
{ AOUTSW, ynone, Pe, 0x6f },
+ { APACKSSLW, ymm, Px, 0x6b,Pe,0x6b },
+ { APACKSSWB, ymm, Px, 0x63,Pe,0x63 },
+ { APACKUSWB, ymm, Px, 0x67,Pe,0x67 },
+ { APADDB, ymm, Px, 0xfc,Pe,0xfc },
+ { APADDL, ymm, Px, 0xfe,Pe,0xfe },
+ { APADDQ, yxm, Pe, 0xd4 },
+ { APADDSB, ymm, Px, 0xec,Pe,0xec },
+ { APADDSW, ymm, Px, 0xed,Pe,0xed },
+ { APADDUSB, ymm, Px, 0xdc,Pe,0xdc },
+ { APADDUSW, ymm, Px, 0xdd,Pe,0xdd },
+ { APADDW, ymm, Px, 0xfd,Pe,0xfd },
+ { APAND, ymm, Px, 0xdb,Pe,0xdb },
+ { APANDN, ymm, Px, 0xdf,Pe,0xdf },
+ { APAVGB, ymm, Px, 0xe0,Pe,0xe0 },
+ { APAVGW, ymm, Px, 0xe3,Pe,0xe3 },
+ { APCMPEQB, ymm, Px, 0x74,Pe,0x74 },
+ { APCMPEQL, ymm, Px, 0x76,Pe,0x76 },
+ { APCMPEQW, ymm, Px, 0x75,Pe,0x75 },
+ { APCMPGTB, ymm, Px, 0x64,Pe,0x64 },
+ { APCMPGTL, ymm, Px, 0x66,Pe,0x66 },
+ { APCMPGTW, ymm, Px, 0x65,Pe,0x65 },
+ { APEXTRW, yextrw, Pq, 0xc5 },
+ { APF2IL, ymfp, Px, 0x1d },
+ { APF2IW, ymfp, Px, 0x1c },
+ { API2FL, ymfp, Px, 0x0d },
+ { API2FW, ymfp, Px, 0x0c },
+ { APINSRW, yextrw, Pq, 0xc4 },
+ { APMADDWL, ymm, Px, 0xf5,Pe,0xf5 },
+ { APMAXSW, yxm, Pe, 0xee },
+ { APMAXUB, yxm, Pe, 0xde },
+ { APMINSW, yxm, Pe, 0xea },
+ { APMINUB, yxm, Pe, 0xda },
+ { APMOVMSKB, ymskb, Px, Pe,0xd7,0xd7 },
+ { APMULHRW, ymfp, Px, 0xb7 },
+ { APMULHUW, ymm, Px, 0xe4,Pe,0xe4 },
+ { APMULHW, ymm, Px, 0xe5,Pe,0xe5 },
+ { APMULLW, ymm, Px, 0xd5,Pe,0xd5 },
+ { APMULULQ, ymm, Px, 0xf4,Pe,0xf4 },
{ APOPAL, ynone, Px, 0x61 },
{ APOPAW, ynone, Pe, 0x61 },
{ APOPFL, ynone, Px, 0x9d },
@@ -459,6 +706,38 @@
{ APOPFW, ynone, Pe, 0x9d },
{ APOPL, ypopl, Px, 0x58,0x8f,(00) },
{ APOPW, ypopl, Pe, 0x58,0x8f,(00) },
+ { APOR, ymm, Px, 0xeb,Pe,0xeb },
+ { APSADBW, yxm, Pq, 0xf6 },
+ { APSHUFHW, yxshuf, Pf3, 0x70 },
+ { APSHUFL, yxshuf, Pq, 0x70 },
+ { APSHUFLW, yxshuf, Pf2, 0x70 },
+ { APSHUFW, ymshuf, Pm, 0x70 },
+ { APSLLO, ypsdq, Pq, 0x73,(07) },
+ { APSLLL, yps, Px, 0xf2, 0x72,(06), Pe,0xf2, Pe,0x72,(06) },
+ { APSLLQ, yps, Px, 0xf3, 0x73,(06), Pe,0xf3, Pe,0x7e,(06) },
+ { APSLLW, yps, Px, 0xf1, 0x71,(06), Pe,0xf1, Pe,0x71,(06) },
+ { APSRAL, yps, Px, 0xe2, 0x72,(04), Pe,0xe2, Pe,0x72,(04) },
+ { APSRAW, yps, Px, 0xe1, 0x71,(04), Pe,0xe1, Pe,0x71,(04) },
+ { APSRLO, ypsdq, Pq, 0x73,(03) },
+ { APSRLL, yps, Px, 0xd2, 0x72,(02), Pe,0xd2, Pe,0x72,(02) },
+ { APSRLQ, yps, Px, 0xd3, 0x73,(02), Pe,0xd3, Pe,0x73,(02) },
+ { APSRLW, yps, Px, 0xd1, 0x71,(02), Pe,0xe1, Pe,0x71,(02) },
+ { APSUBB, yxm, Pe, 0xf8 },
+ { APSUBL, yxm, Pe, 0xfa },
+ { APSUBQ, yxm, Pe, 0xfb },
+ { APSUBSB, yxm, Pe, 0xe8 },
+ { APSUBSW, yxm, Pe, 0xe9 },
+ { APSUBUSB, yxm, Pe, 0xd8 },
+ { APSUBUSW, yxm, Pe, 0xd9 },
+ { APSUBW, yxm, Pe, 0xf9 },
+ { APUNPCKHBW, ymm, Px, 0x68,Pe,0x68 },
+ { APUNPCKHLQ, ymm, Px, 0x6a,Pe,0x6a },
+ { APUNPCKHQDQ, yxm, Pe, 0x6d },
+ { APUNPCKHWL, ymm, Px, 0x69,Pe,0x69 },
+ { APUNPCKLBW, ymm, Px, 0x60,Pe,0x60 },
+ { APUNPCKLLQ, ymm, Px, 0x62,Pe,0x62 },
+ { APUNPCKLQDQ, yxm, Pe, 0x6c },
+ { APUNPCKLWL, ymm, Px, 0x61,Pe,0x61 },
{ APUSHAL, ynone, Px, 0x60 },
{ APUSHAW, ynone, Pe, 0x60 },
{ APUSHFL, ynone, Px, 0x9c },
@@ -465,9 +744,12 @@
{ APUSHFW, ynone, Pe, 0x9c },
{ APUSHL, ypushl, Px, 0x50,0xff,(06),0x6a,0x68 },
{ APUSHW, ypushl, Pe, 0x50,0xff,(06),0x6a,0x68 },
+ { APXOR, ymm, Px, 0xef,Pe,0xef },
{ ARCLB, yshb, Pb, 0xd0,(02),0xc0,(02),0xd2,(02) },
{ ARCLL, yshl, Px, 0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02) },
{ ARCLW, yshl, Pe, 0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02) },
+ { ARCPPS, yxm, Pm, 0x53 },
+ { ARCPSS, yxm, Pf3, 0x53 },
{ ARCRB, yshb, Pb, 0xd0,(03),0xc0,(03),0xd2,(03) },
{ ARCRL, yshl, Px, 0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03) },
{ ARCRW, yshl, Pe, 0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03) },
@@ -480,6 +762,8 @@
{ ARORB, yshb, Pb, 0xd0,(01),0xc0,(01),0xd2,(01) },
{ ARORL, yshl, Px, 0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01) },
{ ARORW, yshl, Pe, 0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01) },
+ { ARSQRTPS, yxm, Pm, 0x52 },
+ { ARSQRTSS, yxm, Pf3, 0x52 },
{ ASAHF, ynone, Px, 0x9e },
{ ASALB, yshb, Pb, 0xd0,(04),0xc0,(04),0xd2,(04) },
{ ASALL, yshl, Px, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) },
@@ -517,14 +801,25 @@
{ ASHRB, yshb, Pb, 0xd0,(05),0xc0,(05),0xd2,(05) },
{ ASHRL, yshl, Px, 0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05) },
{ ASHRW, yshl, Pe, 0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05) },
+ { ASHUFPD, yxshuf, Pq, 0xc6 },
+ { ASHUFPS, yxshuf, Pm, 0xc6 },
+ { ASQRTPD, yxm, Pe, 0x51 },
+ { ASQRTPS, yxm, Pm, 0x51 },
+ { ASQRTSD, yxm, Pf2, 0x51 },
+ { ASQRTSS, yxm, Pf3, 0x51 },
{ ASTC, ynone, Px, 0xf9 },
{ ASTD, ynone, Px, 0xfd },
{ ASTI, ynone, Px, 0xfb },
+ { ASTMXCSR, ysvrs, Pm, 0xae,(03),0xae,(03) },
{ ASTOSB, ynone, Pb, 0xaa },
{ ASTOSL, ynone, Px, 0xab },
{ ASTOSW, ynone, Pe, 0xab },
{ ASUBB, yxorb, Pb, 0x2c,0x80,(05),0x28,0x2a },
{ ASUBL, yaddl, Px, 0x83,(05),0x2d,0x81,(05),0x29,0x2b },
+ { ASUBPD, yxm, Pe, 0x5c },
+ { ASUBPS, yxm, Pm, 0x5c },
+ { ASUBSD, yxm, Pf2, 0x5c },
+ { ASUBSS, yxm, Pf3, 0x5c },
{ ASUBW, yaddl, Pe, 0x83,(05),0x2d,0x81,(05),0x29,0x2b },
{ ASYSCALL, ynone, Px, 0xcd,100 },
{ ATESTB, ytestb, Pb, 0xa8,0xf6,(00),0x84,0x84 },
@@ -531,6 +826,12 @@
{ ATESTL, ytestl, Px, 0xa9,0xf7,(00),0x85,0x85 },
{ ATESTW, ytestl, Pe, 0xa9,0xf7,(00),0x85,0x85 },
{ ATEXT, ytext, Px },
+ { AUCOMISD, yxcmp, Pe, 0x2e },
+ { AUCOMISS, yxcmp, Pm, 0x2e },
+ { AUNPCKHPD, yxm, Pe, 0x15 },
+ { AUNPCKHPS, yxm, Pm, 0x15 },
+ { AUNPCKLPD, yxm, Pe, 0x14 },
+ { AUNPCKLPS, yxm, Pm, 0x14 },
{ AVERR, ydivl, Pm, 0x00,(04) },
{ AVERW, ydivl, Pm, 0x00,(05) },
{ AWAIT, ynone, Px, 0x9b },
@@ -541,6 +842,8 @@
{ AXLAT, ynone, Px, 0xd7 },
{ AXORB, yxorb, Pb, 0x34,0x80,(06),0x30,0x32 },
{ AXORL, yxorl, Px, 0x83,(06),0x35,0x81,(06),0x31,0x33 },
+ { AXORPD, yxm, Pe, 0x57 },
+ { AXORPS, yxm, Pm, 0x57 },
{ AXORW, yxorl, Pe, 0x83,(06),0x35,0x81,(06),0x31,0x33 },
{ AFMOVB, yfmvx, Px, 0xdf,(04) },
@@ -649,6 +952,9 @@
{ AFXTRACT, ynone, Px, 0xd9, 0xf4 },
{ AFYL2X, ynone, Px, 0xd9, 0xf1 },
{ AFYL2XP1, ynone, Px, 0xd9, 0xf9 },
+
{ AEND },
0
};
+
+Optab* opindex[ALAST+1];
--- a/sys/src/cmd/8l/span.c
+++ b/sys/src/cmd/8l/span.c
@@ -326,7 +326,7 @@
{
long v;
- if(a->type >= D_INDIR || a->index != D_NONE) {
+ if((a->type >= D_INDIR && a->type < D_M0) || a->index != D_NONE) {
if(a->index != D_NONE && a->scale == 0) {
if(a->type == D_ADDR) {
switch(a->index) {
@@ -387,6 +387,26 @@
case D_F0+7:
return Yrf;
+ case D_M0+0:
+ case D_M0+1:
+ case D_M0+2:
+ case D_M0+3:
+ case D_M0+4:
+ case D_M0+5:
+ case D_M0+6:
+ case D_M0+7:
+ return Ymr;
+
+ case D_X0+0:
+ case D_X0+1:
+ case D_X0+2:
+ case D_X0+3:
+ case D_X0+4:
+ case D_X0+5:
+ case D_X0+6:
+ case D_X0+7:
+ return Yxr;
+
case D_NONE:
return Ynone;
@@ -576,7 +596,7 @@
v = a->offset;
t = a->type;
if(a->index != D_NONE) {
- if(t >= D_INDIR) {
+ if(t >= D_INDIR && t < D_M0) {
t -= D_INDIR;
if(t == D_NONE) {
*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
@@ -624,7 +644,13 @@
*andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
return;
}
- if(t >= D_INDIR) {
+ if(t >= D_M0 && t <= D_X0+7) {
+ if(v)
+ goto bad;
+ *andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
+ return;
+ }
+ if(t >= D_INDIR && t < D_M0) {
t -= D_INDIR;
if(t == D_NONE || D_CS <= t && t <= D_GS) {
*andptr++ = (0 << 6) | (5 << 0) | (r << 3);
@@ -835,6 +861,30 @@
print("%P\n", p);
}
+static int
+mediaop(Optab *o, int op, int osize, int z)
+{
+ switch(op){
+ case Pm:
+ case Pe:
+ case Pf2:
+ case Pf3:
+ if(osize != 1){
+ if(op != Pm)
+ *andptr++ = op;
+ *andptr++ = Pm;
+ op = o->op[++z];
+ break;
+ }
+ default:
+ if(andptr == and || andptr[-1] != Pm)
+ *andptr++ = Pm;
+ break;
+ }
+ *andptr++ = op;
+ return z;
+}
+
void
doasm(Prog *p)
{
@@ -851,7 +901,7 @@
if(pre)
*andptr++ = pre;
- o = &optab[p->as];
+ o = opindex[p->as];
ft = oclass(&p->from) * Ymax;
tt = oclass(&p->to) * Ymax;
t = o->ytab;
@@ -872,6 +922,12 @@
*andptr++ = Pm;
break;
+ case Pf2: /* xmm opcode escape */
+ case Pf3:
+ *andptr++ = o->prefix;
+ *andptr++ = Pm;
+ break;
+
case Pm: /* opcode escape */
*andptr++ = Pm;
break;
@@ -903,6 +959,30 @@
asmand(&p->from, reg[p->to.type]);
break;
+ case Zm_r_xm:
+ mediaop(o, op, t[3], z);
+ asmand(&p->from, reg[p->to.type]);
+ break;
+
+ case Zm_r_i_xm:
+ mediaop(o, op, t[3], z);
+ asmand(&p->from, reg[p->to.type]);
+ *andptr++ = p->to.offset;
+ break;
+
+ case Zm_r_3d:
+ *andptr++ = 0x0f;
+ *andptr++ = 0x0f;
+ asmand(&p->from, reg[p->to.type]);
+ *andptr++ = op;
+ break;
+
+ case Zibm_r:
+ *andptr++ = op;
+ asmand(&p->from, reg[p->to.type]);
+ *andptr++ = p->to.offset;
+ break;
+
case Zaut_r:
*andptr++ = 0x8d; /* leal */
if(p->from.type != D_ADDR)
@@ -924,6 +1004,17 @@
asmand(&p->to, reg[p->from.type]);
break;
+ case Zr_m_xm:
+ mediaop(o, op, t[3], z);
+ asmand(&p->to, reg[p->from.type]);
+ break;
+
+ case Zr_m_i_xm:
+ mediaop(o, op, t[3], z);
+ asmand(&p->to, reg[p->from.type]);
+ *andptr++ = p->from.offset;
+ break;
+
case Zo_m:
*andptr++ = op;
asmand(&p->to, o->op[z+1]);
@@ -941,6 +1032,12 @@
asmand(&p->to, o->op[z+1]);
*andptr++ = v;
break;
+
+ case Zibo_m_xm:
+ z = mediaop(o, op, t[3], z);
+ asmand(&p->to, o->op[z+1]);
+ *andptr++ = v;
+ break;
case Z_ib:
v = vaddr(&p->to);