ref: 66b6185845e85258f1408271d5f705aacfa6ffdb
parent: 753a35b52ac098985aff5e22a069d30d16903385
author: Sigrid <[email protected]>
date: Sun Dec 6 13:48:32 EST 2020
amd64, vmx: support avx/avx2 for host/guest; use *noavx= in plan9.ini to disable
--- a/sys/man/8/plan9.ini
+++ b/sys/man/8/plan9.ini
@@ -898,6 +898,8 @@
battery life (see
.IR stats (8)).
It is not on by default because it causes problems on some laptops.
+.SS \fL*noavx=\fP
+Disables AVX and AVX2 on AMD64 CPUs.
.SS USB
.SS \fL*nousbprobe=\fP
Disable USB host controller detection.
--- a/sys/src/9/pc/cputemp.c
+++ b/sys/src/9/pc/cputemp.c
@@ -13,7 +13,7 @@
if(m->cpuiddx & Acpif)
if(strcmp(m->cpuidid, "GenuineIntel") == 0){
- cpuid(6, regs);
+ cpuid(6, 0, regs);
return regs[0] & 1;
}
return 0;
@@ -28,7 +28,7 @@
ulong regs[4];
static ulong tj;
- cpuid(6, regs);
+ cpuid(6, 0, regs);
if((regs[0] & 1) == 0)
goto unsup;
if(tj == 0){
--- a/sys/src/9/pc/dat.h
+++ b/sys/src/9/pc/dat.h
@@ -250,7 +250,7 @@
int pdbfree;
u32int dr7; /* shadow copy of dr7 */
-
+ u32int xcr0;
void* vmx;
int stack[1];
--- a/sys/src/9/pc/devarch.c
+++ b/sys/src/9/pc/devarch.c
@@ -18,11 +18,6 @@
Qmax = 32,
};
-enum {
- CR4Osfxsr = 1 << 9,
- CR4Oxmmex = 1 << 10,
-};
-
enum { /* cpuid standard function codes */
Highstdfunc = 0, /* also returns vendor string */
Procsig,
@@ -507,13 +502,13 @@
ulong regs[4];
vlong mca, mct, pat;
- cpuid(Highstdfunc, regs);
+ cpuid(Highstdfunc, 0, regs);
memmove(m->cpuidid, ®s[1], BY2WD); /* bx */
memmove(m->cpuidid+4, ®s[3], BY2WD); /* dx */
memmove(m->cpuidid+8, ®s[2], BY2WD); /* cx */
m->cpuidid[12] = '\0';
- cpuid(Procsig, regs);
+ cpuid(Procsig, 0, regs);
m->cpuidax = regs[0];
m->cpuidcx = regs[2];
m->cpuiddx = regs[3];
@@ -650,15 +645,6 @@
if(m->cpuiddx & Mtrr)
mtrrsync();
- if((m->cpuiddx & (Sse|Fxsr)) == (Sse|Fxsr)){ /* have sse fp? */
- fpsave = fpssesave;
- fprestore = fpsserestore;
- putcr4(getcr4() | CR4Osfxsr|CR4Oxmmex);
- } else {
- fpsave = fpx87save;
- fprestore = fpx87restore;
- }
-
if(strcmp(m->cpuidid, "GenuineIntel") == 0 && (m->cpuidcx & Rdrnd) != 0)
hwrandbuf = rdrandbuf;
else
@@ -669,9 +655,9 @@
m->havewatchpt8 = 1;
/* check and enable NX bit */
- cpuid(Highextfunc, regs);
+ cpuid(Highextfunc, 0, regs);
if(regs[0] >= Procextfeat){
- cpuid(Procextfeat, regs);
+ cpuid(Procextfeat, 0, regs);
if((regs[3] & (1<<20)) != 0){
vlong efer;
@@ -689,13 +675,15 @@
|| family == 6 && (model == 15 || model == 23 || model == 28))
m->havewatchpt8 = 1;
/* Intel SDM claims amd64 support implies 8-byte watchpoint support */
- cpuid(Highextfunc, regs);
+ cpuid(Highextfunc, 0, regs);
if(regs[0] >= Procextfeat){
- cpuid(Procextfeat, regs);
+ cpuid(Procextfeat, 0, regs);
if((regs[3] & 1<<29) != 0)
m->havewatchpt8 = 1;
}
}
+
+ fpuinit();
cputype = t;
return t->family;
--- a/sys/src/9/pc/devvmx.c
+++ b/sys/src/9/pc/devvmx.c
@@ -44,6 +44,7 @@
PROCB_CTLS = 0x4002,
PROCB_IRQWIN = 1<<2,
+ PROCB_TSCOFFSET = 1<<3,
PROCB_EXITHLT = 1<<7,
PROCB_EXITINVLPG = 1<<9,
PROCB_EXITMWAIT = 1<<10,
@@ -100,6 +101,7 @@
VMENTRY_INTRCODE = 0x4018,
VMENTRY_INTRILEN = 0x401a,
+ VMCS_TSC_OFFSET = 0x2010,
VMCS_LINK = 0x2800,
GUEST_ES = 0x800,
@@ -264,7 +266,9 @@
int index, machno;
char errstr[ERRMAX];
Ureg ureg;
+ uvlong tscoffset;
uintptr cr2;
+ uintptr xcr0;
uintptr dr[8]; /* DR7 is also kept in VMCS */
u8int launched;
u8int vpid;
@@ -484,6 +488,13 @@
}
static int
+xcr0write(Vmx *vmx, char *s)
+{
+ vmx->xcr0 = parseval(s) & 7;
+ return 0;
+}
+
+static int
readonly(Vmx *, char *)
{
return -1;
@@ -581,6 +592,7 @@
{VMXVAR(dr[2]), 0, "dr2"},
{VMXVAR(dr[3]), 0, "dr3"},
{VMXVAR(dr[6]), 0, "dr6", nil, dr6write},
+ {VMXVAR(xcr0), 0, "xcr0", nil, xcr0write},
{GUEST_DR7, 0, "dr7", nil, dr7write},
{VM_INSTRERR, 4, "instructionerror", nil, readonly},
{VM_EXREASON, 4, "exitreason", nil, readonly},
@@ -857,7 +869,7 @@
vlong msr;
int i;
- cpuid(1, regs);
+ cpuid(1, 0, regs);
if((regs[2] & 1<<5) == 0) return;
/* check if disabled by BIOS */
if(rdmsr(0x3a, &msr) < 0) return;
@@ -945,8 +957,8 @@
if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR failed");
x = (u32int)procb_ctls | 1<<1 | 7<<4 | 1<<8 | 1<<13 | 1<<14 | 1<<26; /* currently reserved default1 bits */
- x |= PROCB_EXITHLT | PROCB_EXITMWAIT;
- x |= PROCB_EXITMOVDR | PROCB_EXITIO | PROCB_EXITMONITOR | PROCB_MSRBITMAP;
+ x |= PROCB_TSCOFFSET | PROCB_EXITMWAIT | PROCB_EXITMONITOR | PROCB_EXITHLT;
+ x |= PROCB_EXITMOVDR | PROCB_EXITIO | PROCB_MSRBITMAP;
x |= PROCB_USECTLS2;
x &= msr >> 32;
vmcswrite(PROCB_CTLS, x);
@@ -1042,8 +1054,8 @@
vmx->onentry = FLUSHVPID | FLUSHEPT;
fpinit();
- fpsave(&vmx->fp);
-
+ vmx->xcr0 = m->xcr0 & 1; /* x87 alone */
+
memset(vmx->msrbits, -1, 4096);
vmxtrapmsr(vmx, Efer, 0);
vmcswrite(VMENTRY_MSRLDADDR, PADDR(vmx->msrguest));
@@ -1051,6 +1063,9 @@
vmcswrite(VMEXIT_MSRLDADDR, PADDR(vmx->msrhost));
vmcswrite(MSR_BITMAP, PADDR(vmx->msrbits));
+ cycles(&vmx->tscoffset);
+ vmcswrite(VMCS_TSC_OFFSET, vmx->tscoffset);
+
if(sizeof(uintptr) == 8){
vmxaddmsr(vmx, Star, 0);
vmxaddmsr(vmx, Lstar, 0);
@@ -1074,7 +1089,7 @@
uintptr cr;
vlong x;
- putcr4(getcr4() | 0x2000); /* set VMXE */
+ putcr4(getcr4() | CR4VMXE);
putcr0(getcr0() | 0x20); /* set NE */
cr = getcr0();
if(rdmsr(VMX_CR0_FIXED0, &msr) < 0) error("rdmsr(VMX_CR0_FIXED0) failed");
@@ -1590,8 +1605,9 @@
static void
vmxproc(void *vmxp)
{
- int init, rc, x;
+ int init, rc, x, useend;
u32int procbctls, defprocbctls;
+ u64int start, end, adj;
vlong v;
Vmx *vmx;
@@ -1599,6 +1615,8 @@
procwired(up, vmx->machno);
sched();
init = 0;
+ useend = 0;
+ adj = 0;
defprocbctls = 0;
while(waserror()){
kstrcpy(vmx->errstr, up->errstr, ERRMAX);
@@ -1653,11 +1671,29 @@
}
if((vmx->dr[7] & ~0xd400) != 0)
putdr01236(vmx->dr);
- fpsserestore(&vmx->fp);
- putcr2(vmx->cr2);
+
+ fprestore(&vmx->fp);
+ if(m->xcr0 != 0 && vmx->xcr0 != m->xcr0)
+ putxcr0(vmx->xcr0);
+ if(vmx->cr2 != getcr2())
+ putcr2(vmx->cr2);
+ cycles(&start);
+ if(useend){
+ vmx->tscoffset -= end - start + adj;
+ vmcswrite(VMCS_TSC_OFFSET, vmx->tscoffset);
+ }
+ if(adj == 0){
+ cycles(&adj);
+ adj -= start;
+ }
rc = vmlaunch(&vmx->ureg, vmx->launched);
+ cycles(&end);
+ useend = 1;
vmx->cr2 = getcr2();
- fpssesave(&vmx->fp);
+ if(m->xcr0 != 0 && vmx->xcr0 != m->xcr0)
+ putxcr0(m->xcr0);
+ fpsave(&vmx->fp);
+
splx(x);
if(rc < 0)
error("vmlaunch failed");
@@ -1799,6 +1835,7 @@
free(vmx);
nexterror();
}
+ memset(vmx, 0, sizeof(Vmx));
vmx->state = VMXINIT;
vmx->lastcmd = &vmx->firstcmd;
vmx->mem.next = &vmx->mem;
--- a/sys/src/9/pc/fns.h
+++ b/sys/src/9/pc/fns.h
@@ -15,7 +15,8 @@
int (*cmpswap)(long*, long, long);
int cmpswap486(long*, long, long);
void (*coherence)(void);
-void cpuid(int, ulong regs[]);
+void cpuid(int, int, ulong regs[]);
+void fpuinit(void);
int cpuidentify(void);
void cpuidprint(void);
void (*cycles)(uvlong*);
@@ -138,6 +139,7 @@
void putcr2(ulong);
void putcr3(ulong);
void putcr4(ulong);
+void putxcr0(ulong);
void putdr(u32int*);
void putdr01236(uintptr*);
void putdr6(u32int);
--- /dev/null
+++ b/sys/src/9/pc/fpu.c
@@ -1,0 +1,31 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+enum {
+ CR4Osfxsr = 1 << 9,
+ CR4Oxmmex = 1 << 10,
+};
+
+void
+putxcr0(ulong)
+{
+}
+
+void
+fpuinit(void)
+{
+ uintptr cr4;
+
+ if((m->cpuiddx & (Sse|Fxsr)) == (Sse|Fxsr)){ /* have sse fp? */
+ fpsave = fpssesave;
+ fprestore = fpsserestore;
+ cr4 = getcr4() | CR4Osfxsr|CR4Oxmmex;
+ putcr4(cr4);
+ } else {
+ fpsave = fpx87save;
+ fprestore = fpx87restore;
+ }
+}
--- a/sys/src/9/pc/l.s
+++ b/sys/src/9/pc/l.s
@@ -520,7 +520,7 @@
* a 386 (Ac bit can't be set). If it's not a 386 and the Id bit can't be
* toggled then it's an older 486 of some kind.
*
- * cpuid(fun, regs[4]);
+ * cpuid(fn, sublvl, regs[4]);
*/
TEXT cpuid(SB), $0
MOVL $0x240000, AX
@@ -539,6 +539,7 @@
TESTL $0x200000, AX /* Id */
JZ _cpu486 /* can't toggle this bit on some 486 */
MOVL fn+0(FP), AX
+ MOVL sublvl+4(FP), CX
CPUID
JMP _cpuid
_cpu486:
@@ -555,7 +556,7 @@
XORL CX, CX
XORL DX, DX
_cpuid:
- MOVL regs+4(FP), BP
+ MOVL regs+8(FP), BP
MOVL AX, 0(BP)
MOVL BX, 4(BP)
MOVL CX, 8(BP)
--- a/sys/src/9/pc/mkfile
+++ b/sys/src/9/pc/mkfile
@@ -49,6 +49,7 @@
OBJ=\
l.$O\
cga.$O\
+ fpu.$O\
i8253.$O\
i8259.$O\
main.$O\
--- a/sys/src/9/pc/mtrr.c
+++ b/sys/src/9/pc/mtrr.c
@@ -289,9 +289,9 @@
ulong regs[4];
uvlong mask;
- cpuid(Exthighfunc, regs);
+ cpuid(Exthighfunc, 0, regs);
if(regs[0] >= Extaddrsz) { /* ax */
- cpuid(Extaddrsz, regs);
+ cpuid(Extaddrsz, 0, regs);
mask = (1ULL << (regs[0] & 0xFF)) - 1; /* ax */
} else {
mask = (1ULL << 36) - 1;
--- a/sys/src/9/pc64/dat.h
+++ b/sys/src/9/pc64/dat.h
@@ -2,6 +2,8 @@
typedef struct BIOS32ci BIOS32ci;
typedef struct Conf Conf;
typedef struct Confmem Confmem;
+typedef struct FPssestate FPssestate;
+typedef struct FPavxstate FPavxstate;
typedef struct FPsave FPsave;
typedef struct PFPU PFPU;
typedef struct ISAConf ISAConf;
@@ -49,7 +51,7 @@
uintptr pc;
};
-struct FPsave
+struct FPssestate
{
u16int fcw; /* x87 control word */
u16int fsw; /* x87 status word */
@@ -65,6 +67,18 @@
uchar ign[96]; /* reserved, ignored */
};
+struct FPavxstate
+{
+ FPssestate;
+ uchar header[64]; /* XSAVE header */
+ uchar ymm[256]; /* upper 128-bit regs (AVX) */
+};
+
+struct FPsave
+{
+ FPavxstate;
+};
+
enum
{
/* this is a state */
@@ -224,9 +238,12 @@
int havewatchpt8;
int havenx;
uvlong tscticks;
-
+
u64int dr7; /* shadow copy of dr7 */
-
+ u64int xcr0;
+ u32int fpsavesz;
+ u32int fpalign;
+
void* vmx;
uintptr stack[1];
@@ -270,8 +287,14 @@
/* cpuid instruction result register bits */
enum {
+ /* ax */
+ Xsaveopt = 1<<0,
+ Xsaves = 1<<3,
+
/* cx */
Monitor = 1<<3,
+ Xsave = 1<<26,
+ Avx = 1<<28,
/* dx */
Fpuonchip = 1<<0,
--- a/sys/src/9/pc64/fns.h
+++ b/sys/src/9/pc64/fns.h
@@ -15,7 +15,8 @@
int (*cmpswap)(long*, long, long);
int cmpswap486(long*, long, long);
void (*coherence)(void);
-void cpuid(int, ulong regs[]);
+void cpuid(int, int, ulong regs[]);
+void fpuinit(void);
int cpuidentify(void);
void cpuidprint(void);
void (*cycles)(uvlong*);
@@ -40,6 +41,11 @@
void (*fpsave)(FPsave*);
void fpsserestore(FPsave*);
void fpssesave(FPsave*);
+void fpxrestore(FPsave*);
+void fpxrestores(FPsave*);
+void fpxsave(FPsave*);
+void fpxsaveopt(FPsave*);
+void fpxsaves(FPsave*);
void fpx87restore(FPsave*);
void fpx87save(FPsave*);
int fpusave(void);
@@ -48,6 +54,7 @@
u64int getcr2(void);
u64int getcr3(void);
u64int getcr4(void);
+u64int getxcr0(void);
u64int getdr6(void);
char* getconf(char*);
void guesscpuhz(int);
@@ -138,6 +145,7 @@
void putcr2(u64int);
void putcr3(u64int);
void putcr4(u64int);
+void putxcr0(u64int);
void putdr(u64int*);
void putdr01236(u64int*);
void putdr6(u64int);
--- /dev/null
+++ b/sys/src/9/pc64/fpu.c
@@ -1,0 +1,51 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+enum {
+ CR4Osfxsr = 1 << 9,
+ CR4Oxmmex = 1 << 10,
+ CR4Oxsave = 1 << 18,
+};
+
+void
+fpuinit(void)
+{
+ uintptr cr4;
+ ulong regs[4];
+
+ m->fpsavesz = sizeof(FPssestate);
+ m->fpalign = 16;
+ if((m->cpuiddx & (Sse|Fxsr)) == (Sse|Fxsr)){ /* have sse fp? */
+ cr4 = getcr4() | CR4Osfxsr|CR4Oxmmex;
+ putcr4(cr4);
+ fpsave = fpssesave;
+ fprestore = fpsserestore;
+
+ if((m->cpuidcx & (Xsave|Avx)) == (Xsave|Avx) && getconf("*noavx") == nil){
+ cr4 |= CR4Oxsave;
+ putcr4(cr4);
+ m->xcr0 = 7; /* x87, sse, avx */
+ putxcr0(m->xcr0);
+ fpsave = fpxsave;
+ fprestore = fpxrestore;
+
+ cpuid(0xd, 0, regs);
+ m->fpsavesz = regs[1];
+ m->fpalign = 64;
+
+ cpuid(0xd, 1, regs);
+ if(regs[0] & Xsaveopt)
+ fpsave = fpxsaveopt;
+ if(regs[0] & Xsaves){
+ fpsave = fpxsaves;
+ fprestore = fpxrestores;
+ }
+ }
+ } else {
+ fpsave = fpx87save;
+ fprestore = fpx87restore;
+ }
+}
--- a/sys/src/9/pc64/l.s
+++ b/sys/src/9/pc64/l.s
@@ -249,9 +249,10 @@
*/
TEXT cpuid(SB), $-4
MOVL RARG, AX /* function in AX */
+ MOVL cx+8(FP), CX /* sub-level in CX */
CPUID
- MOVQ info+8(FP), BP
+ MOVQ info+16(FP), BP
MOVL AX, 0(BP)
MOVL BX, 4(BP)
MOVL CX, 8(BP)
@@ -399,6 +400,21 @@
MOVQ RARG, CR4
RET
+TEXT getxcr0(SB), 1, $-4 /* XCR0 - extended control */
+ XORQ CX, CX
+ WORD $0x010f; BYTE $0xd0 // XGETBV
+ SHLQ $32, DX
+ ORQ DX, AX
+ RET
+
+TEXT putxcr0(SB), 1, $-4
+ XORQ CX, CX
+ MOVL RARG, DX
+ SHRQ $32, DX
+ MOVL RARG, AX
+ WORD $0x010f; BYTE $0xd1 // XSETBV
+ RET
+
TEXT mb386(SB), 1, $-4 /* hack */
TEXT mb586(SB), 1, $-4
XORL AX, AX
@@ -624,6 +640,36 @@
TEXT _fxsave(SB), 1, $-4
FXSAVE64 (RARG)
+ RET
+
+TEXT _xrstor(SB), 1, $-4
+ MOVL $7, AX
+ XORL DX, DX
+ BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x6d; BYTE $0x00 // XRSTOR (RARG)
+ RET
+
+TEXT _xrstors(SB), 1, $-4
+ MOVL $7, AX
+ XORL DX, DX
+ BYTE $0x48; BYTE $0x0f; BYTE $0xc7; BYTE $0x5d; BYTE $0x00 // XRSTORS (RARG)
+ RET
+
+TEXT _xsave(SB), 1, $-4
+ MOVL $7, AX
+ XORL DX, DX
+ BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x65; BYTE $0x00 // XSAVE (RARG)
+ RET
+
+TEXT _xsaveopt(SB), 1, $-4
+ MOVL $7, AX
+ XORL DX, DX
+ BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x75; BYTE $0x00 // XSAVEOPT (RARG)
+ RET
+
+TEXT _xsaves(SB), 1, $-4
+ MOVL $7, AX
+ XORL DX, DX
+ BYTE $0x48; BYTE $0x0f; BYTE $0xc7; BYTE $0x6d; BYTE $0x00 // XSAVES (RARG)
RET
TEXT _fwait(SB), 1, $-4
--- a/sys/src/9/pc64/main.c
+++ b/sys/src/9/pc64/main.c
@@ -304,6 +304,9 @@
extern void _fninit(void);
extern void _fxrstor(void*);
extern void _fxsave(void*);
+extern void _xrstor(void*);
+extern void _xsave(void*);
+extern void _xsaveopt(void*);
extern void _fwait(void);
extern void _ldmxcsr(u32int);
extern void _stts(void);
@@ -333,6 +336,39 @@
_fxrstor(s);
}
+void
+fpxsave(FPsave *s)
+{
+ _xsave(s);
+ _stts();
+}
+void
+fpxrestore(FPsave *s)
+{
+ _clts();
+ _xrstor(s);
+}
+
+void
+fpxsaves(FPsave *s)
+{
+ _xsaveopt(s);
+ _stts();
+}
+void
+fpxrestores(FPsave *s)
+{
+ _clts();
+ _xrstor(s);
+}
+
+void
+fpxsaveopt(FPsave *s)
+{
+ _xsaveopt(s);
+ _stts();
+}
+
static char* mathmsg[] =
{
nil, /* handled below */
@@ -452,7 +488,7 @@
up->fpstate |= FPkernel;
}
while(up->fpslot[index] == nil)
- up->fpslot[index] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
+ up->fpslot[index] = mallocalign(m->fpsavesz, m->fpalign, 0, 0);
up->fpsave = up->fpslot[index];
up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
break;
@@ -538,8 +574,8 @@
case FPinactive | FPpush:
case FPinactive:
while(p->fpslot[0] == nil)
- p->fpslot[0] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
- memmove(p->fpsave = p->fpslot[0], up->fpslot[0], sizeof(FPsave));
+ p->fpslot[0] = mallocalign(m->fpsavesz, m->fpalign, 0, 0);
+ memmove(p->fpsave = p->fpslot[0], up->fpslot[0], m->fpsavesz);
p->fpstate = FPinactive;
}
splx(s);
--- a/sys/src/9/pc64/mem.h
+++ b/sys/src/9/pc64/mem.h
@@ -26,7 +26,6 @@
#define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1))
#define PGROUND(s) ROUND(s, BY2PG)
#define BLOCKALIGN 8
-#define FPalign 16
#define MAXMACH 128 /* max # cpus system can run */
--- a/sys/src/9/pc64/mkfile
+++ b/sys/src/9/pc64/mkfile
@@ -47,6 +47,7 @@
OBJ=\
l.$O\
cga.$O\
+ fpu.$O\
i8253.$O\
i8259.$O\
main.$O\
--- a/sys/src/cmd/vmx/exith.c
+++ b/sys/src/cmd/vmx/exith.c
@@ -1,9 +1,8 @@
#include <u.h>
#include <libc.h>
-#include <thread.h>
-#include <bio.h>
#include "dat.h"
#include "fns.h"
+#include "x86.h"
int persist = 1;
@@ -118,109 +117,167 @@
typedef struct CPUID CPUID;
struct CPUID {
- u32int idx;
u32int ax, bx, cx, dx;
};
-static CPUID *cpuidf;
-static int ncpuidf;
+static u32int cpuidmax;
+static u32int cpuidmaxext;
+static CPUID leaf1;
+static struct {
+ uvlong miscen;
+}msr;
-static void
-auxcpuidproc(void *vpfd)
-{
- int *pfd;
-
- pfd = vpfd;
- close(pfd[1]);
- close(0);
- open("/dev/null", OREAD);
- dup(pfd[0], 1);
- close(pfd[0]);
- procexecl(nil, "/bin/aux/cpuid", "cpuid", "-r", nil);
- threadexits("exec: %r");
-}
+static uchar _cpuid[] = {
+ 0x5E, /* POP SI (PC) */
+ 0x5D, /* POP BP (CPUID&) */
+ 0x58, /* POP AX */
+ 0x59, /* POP CX */
+ 0x51, /* PUSH CX */
+ 0x50, /* PUSH AX */
+ 0x55, /* PUSH BP */
+ 0x56, /* PUSH SI */
+
+ 0x31, 0xDB, /* XOR BX, BX */
+ 0x31, 0xD2, /* XOR DX, DX */
+
+ 0x0F, 0xA2, /* CPUID */
+
+ 0x89, 0x45, 0x00, /* MOV AX, 0(BP) */
+ 0x89, 0x5d, 0x04, /* MOV BX, 4(BP) */
+ 0x89, 0x4d, 0x08, /* MOV CX, 8(BP) */
+ 0x89, 0x55, 0x0C, /* MOV DX, 12(BP) */
+ 0xC3, /* RET */
+};
+
+static CPUID (*getcpuid)(ulong ax, ulong cx) = (CPUID(*)(ulong, ulong)) _cpuid;
+
void
cpuidinit(void)
{
- int pfd[2];
- Biobuf *bp;
- char *l, *f[5];
- CPUID *cp;
-
- pipe(pfd);
- procrfork(auxcpuidproc, pfd, 4096, RFFDG);
- close(pfd[0]);
- bp = Bfdopen(pfd[1], OREAD);
- if(bp == nil) sysfatal("Bopenfd: %r");
- for(; l = Brdstr(bp, '\n', 1), l != nil; free(l)){
- if(tokenize(l, f, 5) < 5) continue;
- cpuidf = realloc(cpuidf, (ncpuidf + 1) * sizeof(CPUID));
- cp = cpuidf + ncpuidf++;
- cp->idx = strtoul(f[0], nil, 16);
- cp->ax = strtoul(f[1], nil, 16);
- cp->bx = strtoul(f[2], nil, 16);
- cp->cx = strtoul(f[3], nil, 16);
- cp->dx = strtoul(f[4], nil, 16);
+ CPUID r;
+ int f;
+
+ if(sizeof(uintptr) == 8) /* patch out POP BP -> POP AX */
+ _cpuid[1] = 0x58;
+ segflush(_cpuid, sizeof(_cpuid));
+
+ r = getcpuid(0, 0);
+ cpuidmax = r.ax;
+ r = getcpuid(0x80000000, 0);
+ cpuidmaxext = r.ax;
+ leaf1 = getcpuid(1, 0);
+
+ memset(&msr, 0, sizeof(msr));
+ if((f = open("/dev/msr", OREAD)) >= 0){
+ pread(f, &msr.miscen, 8, 0x1a0);
+ msr.miscen &= 1<<0; /* fast strings */
+ close(f);
}
- Bterm(bp);
- close(pfd[1]);
}
-CPUID *
-getcpuid(ulong idx)
-{
- CPUID *cp;
-
- for(cp = cpuidf; cp < cpuidf + ncpuidf; cp++)
- if(cp->idx == idx)
- return cp;
- return nil;
-}
+static int xsavesz[] = {
+ [1] = 512+64,
+ [3] = 512+64,
+ [7] = 512+64+256,
+};
-int maxcpuid = 7;
-
static void
cpuid(ExitInfo *ei)
{
u32int ax, bx, cx, dx;
- CPUID *cp;
- static CPUID def;
-
+ CPUID cp;
+
ax = rget(RAX);
- cp = getcpuid(ax);
- if(cp == nil) cp = &def;
+ cx = rget(RCX);
+ bx = dx = 0;
+ cp = getcpuid(ax, cx);
switch(ax){
- case 0: /* highest register & GenuineIntel */
- ax = maxcpuid;
- bx = cp->bx;
- dx = cp->dx;
- cx = cp->cx;
+ case 0x00: /* highest register & GenuineIntel */
+ ax = MIN(cpuidmax, 0x18);
+ bx = cp.bx;
+ dx = cp.dx;
+ cx = cp.cx;
break;
- case 1: /* features */
- ax = cp->ax;
- bx = cp->bx & 0xffff;
- cx = cp->cx & 0x60de2203;
- dx = cp->dx & 0x0782a179;
+ case 0x01: /* features */
+ ax = cp.ax;
+ bx = cp.bx & 0xffff;
+ /* some features removed, hypervisor added */
+ cx = cp.cx & 0x76de3217 | 0x80000000UL;
+ dx = cp.dx & 0x0f8aa579;
+ if(leaf1.cx & 1<<27){
+ if(rget("cr4real") & Cr4Osxsave)
+ cx |= 1<<27;
+ }else{
+ cx &= ~0x1c000000;
+ }
break;
- case 2: goto literal; /* cache stuff */
- case 3: goto zero; /* processor serial number */
- case 4: goto zero; /* cache stuff */
- case 5: goto zero; /* monitor/mwait */
- case 6: goto zero; /* thermal management */
- case 7: goto zero; /* more features */
- case 10: goto zero; /* performance counters */
+ case 0x02: goto literal; /* cache stuff */
+ case 0x03: goto zero; /* processor serial number */
+ case 0x04: goto literal; /* cache stuff */
+ case 0x05: goto zero; /* monitor/mwait */
+ case 0x06: goto zero; /* thermal management */
+ case 0x07: /* more features */
+ if(cx == 0){
+ ax = 0;
+ bx = cp.bx & 0x2369;
+ cx = 0;
+ if((leaf1.cx & 1<<27) == 0)
+ bx &= ~0xdc230020;
+ }else{
+ goto zero;
+ }
+ break;
+ case 0x08: goto zero;
+ case 0x09: goto literal; /* direct cache access */
+ case 0x0a: goto zero; /* performance counters */
+ case 0x0b: goto zero; /* extended topology */
+ case 0x0c: goto zero;
+ case 0x0d: /* extended state */
+ if((leaf1.cx & 1<<27) == 0)
+ goto zero;
+ if(cx == 0){ /* main leaf */
+ ax = cp.ax & 7; /* x87, sse, avx */
+ bx = xsavesz[rget("xcr0")]; /* current xsave size */
+ cx = xsavesz[ax]; /* max xsave size */
+ }else if(cx == 1){ /* sub leaf */
+ ax = cp.ax & 7; /* xsaveopt, xsavec, xgetbv1 */
+ bx = xsavesz[rget("xcr0")];
+ cx = 0;
+ }else if(cx == 2){
+ ax = xsavesz[7] - xsavesz[3];
+ bx = xsavesz[3];
+ cx = 0;
+ }else{
+ goto zero;
+ }
+ break;
+ case 0x0f: goto zero; /* RDT */
+ case 0x10: goto zero; /* RDT */
+ case 0x12: goto zero; /* SGX */
+ case 0x14: goto zero; /* PT */
+ case 0x15: goto zero; /* TSC */
+ case 0x16: goto zero; /* cpu clock */
+ case 0x17: goto zero; /* SoC */
+ case 0x18: goto literal; /* pages, tlb */
+
+ case 0x40000000: /* hypervisor */
+ ax = 0;
+ bx = 0x4b4d564b; /* act as KVM */
+ cx = 0x564b4d56;
+ dx = 0x4d;
+ break;
+
case 0x80000000: /* highest register */
- ax = 0x80000008;
- bx = cx = dx = 0;
+ ax = MIN(cpuidmaxext, 0x80000008);
+ cx = 0;
break;
case 0x80000001: /* signature & ext features */
- ax = cp->ax;
- bx = 0;
- cx = cp->cx & 0x121;
+ ax = cp.ax;
+ cx = cp.cx & 0x121;
if(sizeof(uintptr) == 8)
- dx = cp->dx & 0x24100800;
+ dx = cp.dx & 0x24100800;
else
- dx = cp->dx & 0x04100000;
+ dx = cp.dx & 0x04100000;
break;
case 0x80000002: goto literal; /* brand string */
case 0x80000003: goto literal; /* brand string */
@@ -230,18 +287,16 @@
case 0x80000007: goto zero; /* invariant tsc */
case 0x80000008: goto literal; /* address bits */
literal:
- ax = cp->ax;
- bx = cp->bx;
- cx = cp->cx;
- dx = cp->dx;
+ ax = cp.ax;
+ bx = cp.bx;
+ cx = cp.cx;
+ dx = cp.dx;
break;
default:
- vmerror("unknown cpuid field eax=%#ux", ax);
+ if((ax & 0xf0000000) != 0x40000000)
+ vmerror("unknown cpuid field eax=%#ux", ax);
zero:
- ax = 0;
- bx = 0;
- cx = 0;
- dx = 0;
+ ax = cx = 0;
break;
}
rset(RAX, ax);
@@ -267,6 +322,9 @@
else rset("pat", val);
break;
case 0x8B: val = 0; break; /* microcode update */
+ case 0x1A0: /* IA32_MISC_ENABLE */
+ if(rd) val = msr.miscen;
+ break;
default:
if(rd){
vmerror("read from unknown MSR %#ux ignored", cx);
@@ -373,6 +431,26 @@
irqack(ei->qual);
}
+static void
+xsetbv(ExitInfo *ei)
+{
+ uvlong v;
+
+ /* this should also #ud if LOCK prefix is used */
+
+ v = rget(RAX)&0xffffffff | rget(RDX)<<32;
+ if(rget(RCX) & 0xffffffff)
+ postexc("#gp", 0);
+ else if(v != 1 && v != 3 && v != 7)
+ postexc("#gp", 0);
+ else if((leaf1.cx & 1<<26) == 0 || (rget("cr4real") & Cr4Osxsave) == 0)
+ postexc("#ud", NOERRC);
+ else{
+ rset("xcr0", v);
+ skipinstr(ei);
+ }
+}
+
typedef struct ExitType ExitType;
struct ExitType {
char *name;
@@ -389,6 +467,7 @@
{".movdr", movdr},
{"#db", dbgexc},
{"movcr", movcr},
+ {".xsetbv", xsetbv},
};
void
--- a/sys/src/cmd/vmx/fns.h
+++ b/sys/src/cmd/vmx/fns.h
@@ -1,3 +1,4 @@
+#define MIN(a,b) ((a)<(b)?(a):(b))
void *emalloc(ulong);
void loadkernel(char *);
uvlong rget(char *);
--- a/sys/src/cmd/vmx/x86.h
+++ b/sys/src/cmd/vmx/x86.h
@@ -22,8 +22,9 @@
enum {
Cr0Pg = 1<<31,
- Cr4Pse = 1<<4,
- Cr4Pae = 1<<5,
+ Cr4Pse = 1<<4,
+ Cr4Pae = 1<<5,
+ Cr4Osxsave = 1<<18,
EferLme = 1<<8,
};