ref: 8c1bde46f0aa97e9f018e7fb805f367e908fa379
parent: dbbae6d38405cdd817f84e2ace104bb27963a246
author: cinap_lenrek <[email protected]>
date: Sun Dec 6 16:07:30 EST 2020
pc, pc64: move all fpu specific code from main.c to fpu.c
--- a/sys/src/9/pc/fns.h
+++ b/sys/src/9/pc/fns.h
@@ -41,12 +41,6 @@
void fpoff(void);
void (*fprestore)(FPsave*);
void (*fpsave)(FPsave*);
-void fpsserestore(FPsave*);
-void fpssesave(FPsave*);
-void fpx87restore(FPsave*);
-void fpx87restore0(FPsave*);
-void fpx87save(FPsave*);
-void fpx87save0(FPsave*);
ulong getcr0(void);
ulong getcr2(void);
ulong getcr3(void);
--- a/sys/src/9/pc/fpu.c
+++ b/sys/src/9/pc/fpu.c
@@ -3,6 +3,8 @@
#include "mem.h"
#include "dat.h"
#include "fns.h"
+#include "io.h"
+#include "ureg.h"
enum {
CR4Osfxsr = 1 << 9,
@@ -9,11 +11,286 @@
CR4Oxmmex = 1 << 10,
};
+/* from l.s */
+extern void fpsserestore(FPsave*);
+extern void fpssesave(FPsave*);
+extern void fpx87restore0(FPsave*);
+extern void fpx87save0(FPsave*);
+
void
putxcr0(ulong)
{
}
+/*
+ * we keep FPsave structure in SSE format emulating FXSAVE / FXRSTOR
+ * instructions for legacy x87 fpu.
+ */
+static void
+fpx87save(FPsave *fps)
+{
+ ushort tag;
+
+ fpx87save0(fps);
+
+ /*
+ * convert x87 tag word to fxsave tag byte:
+ * 00, 01, 10 -> 1, 11 -> 0
+ */
+ tag = ~fps->tag;
+ tag = (tag | (tag >> 1)) & 0x5555;
+ tag = (tag | (tag >> 1)) & 0x3333;
+ tag = (tag | (tag >> 2)) & 0x0F0F;
+ tag = (tag | (tag >> 4)) & 0x00FF;
+
+ /* NOP fps->fcw = fps->control; */
+ fps->fsw = fps->status;
+ fps->ftw = tag;
+ fps->fop = fps->opcode;
+ fps->fpuip = fps->pc;
+ fps->cs = fps->selector;
+ fps->fpudp = fps->operand;
+ fps->ds = fps->oselector;
+
+#define MOVA(d,s) \
+ *((ushort*)(d+8)) = *((ushort*)(s+8)), \
+ *((ulong*)(d+4)) = *((ulong*)(s+4)), \
+ *((ulong*)(d)) = *((ulong*)(s))
+
+ MOVA(fps->xregs+0x70, fps->regs+70);
+ MOVA(fps->xregs+0x60, fps->regs+60);
+ MOVA(fps->xregs+0x50, fps->regs+50);
+ MOVA(fps->xregs+0x40, fps->regs+40);
+ MOVA(fps->xregs+0x30, fps->regs+30);
+ MOVA(fps->xregs+0x20, fps->regs+20);
+ MOVA(fps->xregs+0x10, fps->regs+10);
+ MOVA(fps->xregs+0x00, fps->regs+00);
+
+#undef MOVA
+
+#define CLR6(d) \
+ *((ulong*)(d)) = 0, \
+ *((ushort*)(d+4)) = 0
+
+ CLR6(fps->xregs+0x70+10);
+ CLR6(fps->xregs+0x60+10);
+ CLR6(fps->xregs+0x50+10);
+ CLR6(fps->xregs+0x40+10);
+ CLR6(fps->xregs+0x30+10);
+ CLR6(fps->xregs+0x20+10);
+ CLR6(fps->xregs+0x10+10);
+ CLR6(fps->xregs+0x00+10);
+
+#undef CLR6
+
+ fps->rsrvd1 = fps->rsrvd2 = fps->mxcsr = fps->mxcsr_mask = 0;
+}
+
+static void
+fpx87restore(FPsave *fps)
+{
+ ushort msk, tos, tag, *reg;
+
+ /* convert fxsave tag byte to x87 tag word */
+ tag = 0;
+ tos = 7 - ((fps->fsw >> 11) & 7);
+ for(msk = 0x80; msk != 0; tos--, msk >>= 1){
+ tag <<= 2;
+ if((fps->ftw & msk) != 0){
+ reg = (ushort*)&fps->xregs[(tos & 7) << 4];
+ switch(reg[4] & 0x7fff){
+ case 0x0000:
+ if((reg[0] | reg[1] | reg[2] | reg[3]) == 0){
+ tag |= 1; /* 01 zero */
+ break;
+ }
+ /* no break */
+ case 0x7fff:
+ tag |= 2; /* 10 special */
+ break;
+ default:
+ if((reg[3] & 0x8000) == 0)
+ break; /* 00 valid */
+ tag |= 2; /* 10 special */
+ break;
+ }
+ }else{
+ tag |= 3; /* 11 empty */
+ }
+ }
+
+#define MOVA(d,s) \
+ *((ulong*)(d)) = *((ulong*)(s)), \
+ *((ulong*)(d+4)) = *((ulong*)(s+4)), \
+ *((ushort*)(d+8)) = *((ushort*)(s+8))
+
+ MOVA(fps->regs+00, fps->xregs+0x00);
+ MOVA(fps->regs+10, fps->xregs+0x10);
+ MOVA(fps->regs+20, fps->xregs+0x20);
+ MOVA(fps->regs+30, fps->xregs+0x30);
+ MOVA(fps->regs+40, fps->xregs+0x40);
+ MOVA(fps->regs+50, fps->xregs+0x50);
+ MOVA(fps->regs+60, fps->xregs+0x60);
+ MOVA(fps->regs+70, fps->xregs+0x70);
+
+#undef MOVA
+
+ fps->oselector = fps->ds;
+ fps->operand = fps->fpudp;
+ fps->opcode = fps->fop & 0x7ff;
+ fps->selector = fps->cs;
+ fps->pc = fps->fpuip;
+ fps->tag = tag;
+ fps->status = fps->fsw;
+ /* NOP fps->control = fps->fcw; */
+
+ fps->r1 = fps->r2 = fps->r3 = fps->r4 = 0;
+
+ fpx87restore0(fps);
+}
+
+static char* mathmsg[] =
+{
+ nil, /* handled below */
+ "denormalized operand",
+ "division by zero",
+ "numeric overflow",
+ "numeric underflow",
+ "precision loss",
+};
+
+static void
+mathnote(ulong status, ulong pc)
+{
+ char *msg, note[ERRMAX];
+ int i;
+
+ /*
+ * Some attention should probably be paid here to the
+ * exception masks and error summary.
+ */
+ msg = "unknown exception";
+ for(i = 1; i <= 5; i++){
+ if(!((1<<i) & status))
+ continue;
+ msg = mathmsg[i];
+ break;
+ }
+ if(status & 0x01){
+ if(status & 0x40){
+ if(status & 0x200)
+ msg = "stack overflow";
+ else
+ msg = "stack underflow";
+ }else
+ msg = "invalid operation";
+ }
+ snprint(note, sizeof note, "sys: fp: %s fppc=0x%lux status=0x%lux",
+ msg, pc, status);
+ postnote(up, 1, note, NDebug);
+}
+
+/*
+ * math coprocessor error
+ */
+static void
+matherror(Ureg*, void*)
+{
+ /*
+ * a write cycle to port 0xF0 clears the interrupt latch attached
+ * to the error# line from the 387
+ */
+ if(!(m->cpuiddx & Fpuonchip))
+ outb(0xF0, 0xFF);
+
+ /*
+ * get floating point state to check out error
+ */
+ fpsave(up->fpsave);
+ up->fpstate = FPinactive;
+ mathnote(up->fpsave->fsw, up->fpsave->fpuip);
+}
+
+/*
+ * SIMD error
+ */
+static void
+simderror(Ureg *ureg, void*)
+{
+ fpsave(up->fpsave);
+ up->fpstate = FPinactive;
+ mathnote(up->fpsave->mxcsr & 0x3f, ureg->pc);
+}
+
+/*
+ * math coprocessor emulation fault
+ */
+static void
+mathemu(Ureg *ureg, void*)
+{
+ ulong status, control;
+
+ if(up->fpstate & FPillegal){
+ /* someone did floating point in a note handler */
+ postnote(up, 1, "sys: floating point in note handler", NDebug);
+ return;
+ }
+ switch(up->fpstate){
+ case FPinit:
+ fpinit();
+ if(fpsave == fpssesave)
+ ldmxcsr(0x1f80); /* no simd exceptions on 386 */
+ while(up->fpsave == nil)
+ up->fpsave = mallocalign(sizeof(FPsave), FPalign, 0, 0);
+ up->fpstate = FPactive;
+ break;
+ case FPinactive:
+ /*
+ * Before restoring the state, check for any pending
+ * exceptions, there's no way to restore the state without
+ * generating an unmasked exception.
+ * More attention should probably be paid here to the
+ * exception masks and error summary.
+ */
+ status = up->fpsave->fsw;
+ control = up->fpsave->fcw;
+ if((status & ~control) & 0x07F){
+ mathnote(status, up->fpsave->fpuip);
+ break;
+ }
+ fprestore(up->fpsave);
+ up->fpstate = FPactive;
+ break;
+ case FPactive:
+ panic("math emu pid %ld %s pc 0x%lux",
+ up->pid, up->text, ureg->pc);
+ break;
+ }
+}
+
+/*
+ * math coprocessor segment overrun
+ */
+static void
+mathover(Ureg*, void*)
+{
+ pexit("math overrun", 0);
+}
+
+void
+mathinit(void)
+{
+ trapenable(VectorCERR, matherror, 0, "matherror");
+ if(m->cpuidfamily == 3)
+ intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror");
+ trapenable(VectorCNA, mathemu, 0, "mathemu");
+ trapenable(VectorCSO, mathover, 0, "mathover");
+ trapenable(VectorSIMD, simderror, 0, "simderror");
+}
+
+/*
+ * fpuinit(), called from cpuidentify() for each cpu.
+ */
void
fpuinit(void)
{
--- a/sys/src/9/pc/main.c
+++ b/sys/src/9/pc/main.c
@@ -234,272 +234,6 @@
}
/*
- * we keep FPsave structure in SSE format emulating FXSAVE / FXRSTOR
- * instructions for legacy x87 fpu.
- */
-void
-fpx87save(FPsave *fps)
-{
- ushort tag;
-
- fpx87save0(fps);
-
- /*
- * convert x87 tag word to fxsave tag byte:
- * 00, 01, 10 -> 1, 11 -> 0
- */
- tag = ~fps->tag;
- tag = (tag | (tag >> 1)) & 0x5555;
- tag = (tag | (tag >> 1)) & 0x3333;
- tag = (tag | (tag >> 2)) & 0x0F0F;
- tag = (tag | (tag >> 4)) & 0x00FF;
-
- /* NOP fps->fcw = fps->control; */
- fps->fsw = fps->status;
- fps->ftw = tag;
- fps->fop = fps->opcode;
- fps->fpuip = fps->pc;
- fps->cs = fps->selector;
- fps->fpudp = fps->operand;
- fps->ds = fps->oselector;
-
-#define MOVA(d,s) \
- *((ushort*)(d+8)) = *((ushort*)(s+8)), \
- *((ulong*)(d+4)) = *((ulong*)(s+4)), \
- *((ulong*)(d)) = *((ulong*)(s))
-
- MOVA(fps->xregs+0x70, fps->regs+70);
- MOVA(fps->xregs+0x60, fps->regs+60);
- MOVA(fps->xregs+0x50, fps->regs+50);
- MOVA(fps->xregs+0x40, fps->regs+40);
- MOVA(fps->xregs+0x30, fps->regs+30);
- MOVA(fps->xregs+0x20, fps->regs+20);
- MOVA(fps->xregs+0x10, fps->regs+10);
- MOVA(fps->xregs+0x00, fps->regs+00);
-
-#undef MOVA
-
-#define CLR6(d) \
- *((ulong*)(d)) = 0, \
- *((ushort*)(d+4)) = 0
-
- CLR6(fps->xregs+0x70+10);
- CLR6(fps->xregs+0x60+10);
- CLR6(fps->xregs+0x50+10);
- CLR6(fps->xregs+0x40+10);
- CLR6(fps->xregs+0x30+10);
- CLR6(fps->xregs+0x20+10);
- CLR6(fps->xregs+0x10+10);
- CLR6(fps->xregs+0x00+10);
-
-#undef CLR6
-
- fps->rsrvd1 = fps->rsrvd2 = fps->mxcsr = fps->mxcsr_mask = 0;
-}
-
-void
-fpx87restore(FPsave *fps)
-{
- ushort msk, tos, tag, *reg;
-
- /* convert fxsave tag byte to x87 tag word */
- tag = 0;
- tos = 7 - ((fps->fsw >> 11) & 7);
- for(msk = 0x80; msk != 0; tos--, msk >>= 1){
- tag <<= 2;
- if((fps->ftw & msk) != 0){
- reg = (ushort*)&fps->xregs[(tos & 7) << 4];
- switch(reg[4] & 0x7fff){
- case 0x0000:
- if((reg[0] | reg[1] | reg[2] | reg[3]) == 0){
- tag |= 1; /* 01 zero */
- break;
- }
- /* no break */
- case 0x7fff:
- tag |= 2; /* 10 special */
- break;
- default:
- if((reg[3] & 0x8000) == 0)
- break; /* 00 valid */
- tag |= 2; /* 10 special */
- break;
- }
- }else{
- tag |= 3; /* 11 empty */
- }
- }
-
-#define MOVA(d,s) \
- *((ulong*)(d)) = *((ulong*)(s)), \
- *((ulong*)(d+4)) = *((ulong*)(s+4)), \
- *((ushort*)(d+8)) = *((ushort*)(s+8))
-
- MOVA(fps->regs+00, fps->xregs+0x00);
- MOVA(fps->regs+10, fps->xregs+0x10);
- MOVA(fps->regs+20, fps->xregs+0x20);
- MOVA(fps->regs+30, fps->xregs+0x30);
- MOVA(fps->regs+40, fps->xregs+0x40);
- MOVA(fps->regs+50, fps->xregs+0x50);
- MOVA(fps->regs+60, fps->xregs+0x60);
- MOVA(fps->regs+70, fps->xregs+0x70);
-
-#undef MOVA
-
- fps->oselector = fps->ds;
- fps->operand = fps->fpudp;
- fps->opcode = fps->fop & 0x7ff;
- fps->selector = fps->cs;
- fps->pc = fps->fpuip;
- fps->tag = tag;
- fps->status = fps->fsw;
- /* NOP fps->control = fps->fcw; */
-
- fps->r1 = fps->r2 = fps->r3 = fps->r4 = 0;
-
- fpx87restore0(fps);
-}
-
-static char* mathmsg[] =
-{
- nil, /* handled below */
- "denormalized operand",
- "division by zero",
- "numeric overflow",
- "numeric underflow",
- "precision loss",
-};
-
-static void
-mathnote(ulong status, ulong pc)
-{
- char *msg, note[ERRMAX];
- int i;
-
- /*
- * Some attention should probably be paid here to the
- * exception masks and error summary.
- */
- msg = "unknown exception";
- for(i = 1; i <= 5; i++){
- if(!((1<<i) & status))
- continue;
- msg = mathmsg[i];
- break;
- }
- if(status & 0x01){
- if(status & 0x40){
- if(status & 0x200)
- msg = "stack overflow";
- else
- msg = "stack underflow";
- }else
- msg = "invalid operation";
- }
- snprint(note, sizeof note, "sys: fp: %s fppc=0x%lux status=0x%lux",
- msg, pc, status);
- postnote(up, 1, note, NDebug);
-}
-
-/*
- * math coprocessor error
- */
-static void
-matherror(Ureg*, void*)
-{
- /*
- * a write cycle to port 0xF0 clears the interrupt latch attached
- * to the error# line from the 387
- */
- if(!(m->cpuiddx & Fpuonchip))
- outb(0xF0, 0xFF);
-
- /*
- * get floating point state to check out error
- */
- fpsave(up->fpsave);
- up->fpstate = FPinactive;
- mathnote(up->fpsave->fsw, up->fpsave->fpuip);
-}
-
-/*
- * SIMD error
- */
-static void
-simderror(Ureg *ureg, void*)
-{
- fpsave(up->fpsave);
- up->fpstate = FPinactive;
- mathnote(up->fpsave->mxcsr & 0x3f, ureg->pc);
-}
-
-/*
- * math coprocessor emulation fault
- */
-static void
-mathemu(Ureg *ureg, void*)
-{
- ulong status, control;
-
- if(up->fpstate & FPillegal){
- /* someone did floating point in a note handler */
- postnote(up, 1, "sys: floating point in note handler", NDebug);
- return;
- }
- switch(up->fpstate){
- case FPinit:
- fpinit();
- if(fpsave == fpssesave)
- ldmxcsr(0x1f80); /* no simd exceptions on 386 */
- while(up->fpsave == nil)
- up->fpsave = mallocalign(sizeof(FPsave), FPalign, 0, 0);
- up->fpstate = FPactive;
- break;
- case FPinactive:
- /*
- * Before restoring the state, check for any pending
- * exceptions, there's no way to restore the state without
- * generating an unmasked exception.
- * More attention should probably be paid here to the
- * exception masks and error summary.
- */
- status = up->fpsave->fsw;
- control = up->fpsave->fcw;
- if((status & ~control) & 0x07F){
- mathnote(status, up->fpsave->fpuip);
- break;
- }
- fprestore(up->fpsave);
- up->fpstate = FPactive;
- break;
- case FPactive:
- panic("math emu pid %ld %s pc 0x%lux",
- up->pid, up->text, ureg->pc);
- break;
- }
-}
-
-/*
- * math coprocessor segment overrun
- */
-static void
-mathover(Ureg*, void*)
-{
- pexit("math overrun", 0);
-}
-
-void
-mathinit(void)
-{
- trapenable(VectorCERR, matherror, 0, "matherror");
- if(m->cpuidfamily == 3)
- intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror");
- trapenable(VectorCNA, mathemu, 0, "mathemu");
- trapenable(VectorCSO, mathover, 0, "mathover");
- trapenable(VectorSIMD, simderror, 0, "simderror");
-}
-
-/*
* set up floating point for a new process
*/
void
--- a/sys/src/9/pc64/fns.h
+++ b/sys/src/9/pc64/fns.h
@@ -39,15 +39,10 @@
void fpinit(void);
void (*fprestore)(FPsave*);
void (*fpsave)(FPsave*);
-void fpsserestore(FPsave*);
-void fpssesave(FPsave*);
-void fpxrestore(FPsave*);
-void fpxrestores(FPsave*);
-void fpxsave(FPsave*);
-void fpxsaveopt(FPsave*);
-void fpxsaves(FPsave*);
-void fpx87restore(FPsave*);
-void fpx87save(FPsave*);
+void fpuprocsetup(Proc*);
+void fpuprocfork(Proc*);
+void fpuprocsave(Proc*);
+void fpuprocrestore(Proc*);
int fpusave(void);
void fpurestore(int);
u64int getcr0(void);
--- a/sys/src/9/pc64/fpu.c
+++ b/sys/src/9/pc64/fpu.c
@@ -3,6 +3,8 @@
#include "mem.h"
#include "dat.h"
#include "fns.h"
+#include "ureg.h"
+#include "io.h"
enum {
CR4Osfxsr = 1 << 9,
@@ -10,7 +12,253 @@
CR4Oxsave = 1 << 18,
};
+/*
+ * SIMD Floating Point.
+ * Assembler support to get at the individual instructions
+ * is in l.s.
+ */
+extern void _clts(void);
+extern void _fldcw(u16int);
+extern void _fnclex(void);
+extern void _fninit(void);
+extern void _fxrstor(void*);
+extern void _fxsave(void*);
+extern void _xrstor(void*);
+extern void _xsave(void*);
+extern void _xsaveopt(void*);
+extern void _fwait(void);
+extern void _ldmxcsr(u32int);
+extern void _stts(void);
+
+/*
+ * not used, AMD64 mandated SSE
+ */
+static void
+fpx87save(FPsave*)
+{
+}
+static void
+fpx87restore(FPsave*)
+{
+}
+
+static void
+fpssesave(FPsave *s)
+{
+ _fxsave(s);
+ _stts();
+}
+static void
+fpsserestore(FPsave *s)
+{
+ _clts();
+ _fxrstor(s);
+}
+
+static void
+fpxsave(FPsave *s)
+{
+ _xsave(s);
+ _stts();
+}
+static void
+fpxrestore(FPsave *s)
+{
+ _clts();
+ _xrstor(s);
+}
+
+static void
+fpxsaves(FPsave *s)
+{
+ _xsaveopt(s);
+ _stts();
+}
+static void
+fpxrestores(FPsave *s)
+{
+ _clts();
+ _xrstor(s);
+}
+
+static void
+fpxsaveopt(FPsave *s)
+{
+ _xsaveopt(s);
+ _stts();
+}
+
+static char* mathmsg[] =
+{
+ nil, /* handled below */
+ "denormalized operand",
+ "division by zero",
+ "numeric overflow",
+ "numeric underflow",
+ "precision loss",
+};
+
+static void
+mathnote(ulong status, uintptr pc)
+{
+ char *msg, note[ERRMAX];
+ int i;
+
+ /*
+ * Some attention should probably be paid here to the
+ * exception masks and error summary.
+ */
+ msg = "unknown exception";
+ for(i = 1; i <= 5; i++){
+ if(!((1<<i) & status))
+ continue;
+ msg = mathmsg[i];
+ break;
+ }
+ if(status & 0x01){
+ if(status & 0x40){
+ if(status & 0x200)
+ msg = "stack overflow";
+ else
+ msg = "stack underflow";
+ }else
+ msg = "invalid operation";
+ }
+ snprint(note, sizeof note, "sys: fp: %s fppc=%#p status=0x%lux",
+ msg, pc, status);
+ postnote(up, 1, note, NDebug);
+}
+
+/*
+ * math coprocessor error
+ */
+static void
+matherror(Ureg *, void*)
+{
+ /*
+ * Save FPU state to check out the error.
+ */
+ fpsave(up->fpsave);
+ up->fpstate = FPinactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
+ mathnote(up->fpsave->fsw, up->fpsave->rip);
+}
+
+/*
+ * SIMD error
+ */
+static void
+simderror(Ureg *ureg, void*)
+{
+ fpsave(up->fpsave);
+ up->fpstate = FPinactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
+ mathnote(up->fpsave->mxcsr & 0x3f, ureg->pc);
+}
+
void
+fpinit(void)
+{
+ /*
+ * A process tries to use the FPU for the
+ * first time and generates a 'device not available'
+ * exception.
+ * Turn the FPU on and initialise it for use.
+ * Set the precision and mask the exceptions
+ * we don't care about from the generic Mach value.
+ */
+ _clts();
+ _fninit();
+ _fwait();
+ _fldcw(0x0232);
+ _ldmxcsr(0x1900);
+}
+
+/*
+ * math coprocessor emulation fault
+ */
+static void
+mathemu(Ureg *ureg, void*)
+{
+ ulong status, control;
+ int index;
+
+ if(up->fpstate & FPillegal){
+ /* someone did floating point in a note handler */
+ postnote(up, 1, "sys: floating point in note handler", NDebug);
+ return;
+ }
+ switch(up->fpstate & ~(FPnouser|FPkernel|FPindexm)){
+ case FPactive | FPpush:
+ _clts();
+ fpsave(up->fpsave);
+ case FPinactive | FPpush:
+ up->fpstate += FPindex1;
+ case FPinit | FPpush:
+ case FPinit:
+ fpinit();
+ index = up->fpstate >> FPindexs;
+ if(index < 0 || index > (FPindexm>>FPindexs))
+ panic("fpslot index overflow: %d", index);
+ if(userureg(ureg)){
+ if(index != 0)
+ panic("fpslot index %d != 0 for user", index);
+ } else {
+ if(index == 0)
+ up->fpstate |= FPnouser;
+ up->fpstate |= FPkernel;
+ }
+ while(up->fpslot[index] == nil)
+ up->fpslot[index] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
+ up->fpsave = up->fpslot[index];
+ up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
+ break;
+ case FPinactive:
+ /*
+ * Before restoring the state, check for any pending
+ * exceptions, there's no way to restore the state without
+ * generating an unmasked exception.
+ * More attention should probably be paid here to the
+ * exception masks and error summary.
+ */
+ status = up->fpsave->fsw;
+ control = up->fpsave->fcw;
+ if((status & ~control) & 0x07F){
+ mathnote(status, up->fpsave->rip);
+ break;
+ }
+ fprestore(up->fpsave);
+ up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
+ break;
+ case FPactive:
+ panic("math emu pid %ld %s pc %#p",
+ up->pid, up->text, ureg->pc);
+ break;
+ }
+}
+
+/*
+ * math coprocessor segment overrun
+ */
+static void
+mathover(Ureg*, void*)
+{
+ pexit("math overrun", 0);
+}
+
+void
+mathinit(void)
+{
+ trapenable(VectorCERR, matherror, 0, "matherror");
+ if(m->cpuidfamily == 3)
+ intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror");
+ trapenable(VectorCNA, mathemu, 0, "mathemu");
+ trapenable(VectorCSO, mathover, 0, "mathover");
+ trapenable(VectorSIMD, simderror, 0, "simderror");
+}
+
+/*
+ * fpuinit(), called from cpuidentify() for each cpu.
+ */
+void
fpuinit(void)
{
uintptr cr4;
@@ -42,4 +290,101 @@
fpsave = fpx87save;
fprestore = fpx87restore;
}
+}
+
+void
+fpuprocsetup(Proc *p)
+{
+ p->fpstate = FPinit;
+ _stts();
+}
+
+void
+fpuprocfork(Proc *p)
+{
+ int s;
+
+ /* save floating point state */
+ s = splhi();
+ switch(up->fpstate & ~FPillegal){
+ case FPactive | FPpush:
+ _clts();
+ case FPactive:
+ fpsave(up->fpsave);
+ up->fpstate = FPinactive | (up->fpstate & FPpush);
+ case FPactive | FPkernel:
+ case FPinactive | FPkernel:
+ case FPinactive | FPpush:
+ case FPinactive:
+ while(p->fpslot[0] == nil)
+ p->fpslot[0] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
+ memmove(p->fpsave = p->fpslot[0], up->fpslot[0], sizeof(FPsave));
+ p->fpstate = FPinactive;
+ }
+ splx(s);
+}
+
+void
+fpuprocsave(Proc *p)
+{
+ switch(p->fpstate & ~(FPnouser|FPkernel|FPindexm)){
+ case FPactive | FPpush:
+ _clts();
+ case FPactive:
+ if(p->state == Moribund){
+ _fnclex();
+ _stts();
+ break;
+ }
+ /*
+ * Fpsave() stores without handling pending
+ * unmasked exeptions. Postnote() can't be called
+ * here as sleep() already has up->rlock, so
+ * the handling of pending exceptions is delayed
+ * until the process runs again and generates an
+ * emulation fault to activate the FPU.
+ */
+ fpsave(p->fpsave);
+ p->fpstate = FPinactive | (p->fpstate & ~FPactive);
+ break;
+ }
+}
+
+void
+fpuprocrestore(Proc*)
+{
+}
+
+
+/*
+ * Fpusave and fpurestore lazily save and restore FPU state across
+ * system calls and the pagefault handler so that we can take
+ * advantage of SSE instructions such as AES-NI in the kernel.
+ */
+int
+fpusave(void)
+{
+ int ostate = up->fpstate;
+ if((ostate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
+ _stts();
+ up->fpstate = FPpush | (ostate & ~FPillegal);
+ return ostate;
+}
+void
+fpurestore(int ostate)
+{
+ int astate = up->fpstate;
+ if(astate == (FPpush | (ostate & ~FPillegal))){
+ if((ostate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
+ _clts();
+ } else {
+ if(astate == FPinit) /* don't restore on procexec()/procsetup() */
+ return;
+ if((astate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
+ _stts();
+ up->fpsave = up->fpslot[ostate>>FPindexs];
+ if(ostate & FPactive)
+ ostate = FPinactive | (ostate & ~FPactive);
+ }
+ up->fpstate = ostate;
}
--- a/sys/src/9/pc64/main.c
+++ b/sys/src/9/pc64/main.c
@@ -293,254 +293,10 @@
rebootjump((uintptr)entry & (ulong)~0xF0000000UL, PADDR(code), size);
}
-/*
- * SIMD Floating Point.
- * Assembler support to get at the individual instructions
- * is in l.s.
- */
-extern void _clts(void);
-extern void _fldcw(u16int);
-extern void _fnclex(void);
-extern void _fninit(void);
-extern void _fxrstor(void*);
-extern void _fxsave(void*);
-extern void _xrstor(void*);
-extern void _xsave(void*);
-extern void _xsaveopt(void*);
-extern void _fwait(void);
-extern void _ldmxcsr(u32int);
-extern void _stts(void);
-
-/*
- * not used, AMD64 mandated SSE
- */
void
-fpx87save(FPsave*)
-{
-}
-void
-fpx87restore(FPsave*)
-{
-}
-
-void
-fpssesave(FPsave *s)
-{
- _fxsave(s);
- _stts();
-}
-void
-fpsserestore(FPsave *s)
-{
- _clts();
- _fxrstor(s);
-}
-
-void
-fpxsave(FPsave *s)
-{
- _xsave(s);
- _stts();
-}
-void
-fpxrestore(FPsave *s)
-{
- _clts();
- _xrstor(s);
-}
-
-void
-fpxsaves(FPsave *s)
-{
- _xsaveopt(s);
- _stts();
-}
-void
-fpxrestores(FPsave *s)
-{
- _clts();
- _xrstor(s);
-}
-
-void
-fpxsaveopt(FPsave *s)
-{
- _xsaveopt(s);
- _stts();
-}
-
-static char* mathmsg[] =
-{
- nil, /* handled below */
- "denormalized operand",
- "division by zero",
- "numeric overflow",
- "numeric underflow",
- "precision loss",
-};
-
-static void
-mathnote(ulong status, uintptr pc)
-{
- char *msg, note[ERRMAX];
- int i;
-
- /*
- * Some attention should probably be paid here to the
- * exception masks and error summary.
- */
- msg = "unknown exception";
- for(i = 1; i <= 5; i++){
- if(!((1<<i) & status))
- continue;
- msg = mathmsg[i];
- break;
- }
- if(status & 0x01){
- if(status & 0x40){
- if(status & 0x200)
- msg = "stack overflow";
- else
- msg = "stack underflow";
- }else
- msg = "invalid operation";
- }
- snprint(note, sizeof note, "sys: fp: %s fppc=%#p status=0x%lux",
- msg, pc, status);
- postnote(up, 1, note, NDebug);
-}
-
-/*
- * math coprocessor error
- */
-static void
-matherror(Ureg *, void*)
-{
- /*
- * Save FPU state to check out the error.
- */
- fpsave(up->fpsave);
- up->fpstate = FPinactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
- mathnote(up->fpsave->fsw, up->fpsave->rip);
-}
-
-/*
- * SIMD error
- */
-static void
-simderror(Ureg *ureg, void*)
-{
- fpsave(up->fpsave);
- up->fpstate = FPinactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
- mathnote(up->fpsave->mxcsr & 0x3f, ureg->pc);
-}
-
-void
-fpinit(void)
-{
- /*
- * A process tries to use the FPU for the
- * first time and generates a 'device not available'
- * exception.
- * Turn the FPU on and initialise it for use.
- * Set the precision and mask the exceptions
- * we don't care about from the generic Mach value.
- */
- _clts();
- _fninit();
- _fwait();
- _fldcw(0x0232);
- _ldmxcsr(0x1900);
-}
-
-/*
- * math coprocessor emulation fault
- */
-static void
-mathemu(Ureg *ureg, void*)
-{
- ulong status, control;
- int index;
-
- if(up->fpstate & FPillegal){
- /* someone did floating point in a note handler */
- postnote(up, 1, "sys: floating point in note handler", NDebug);
- return;
- }
- switch(up->fpstate & ~(FPnouser|FPkernel|FPindexm)){
- case FPactive | FPpush:
- _clts();
- fpsave(up->fpsave);
- case FPinactive | FPpush:
- up->fpstate += FPindex1;
- case FPinit | FPpush:
- case FPinit:
- fpinit();
- index = up->fpstate >> FPindexs;
- if(index < 0 || index > (FPindexm>>FPindexs))
- panic("fpslot index overflow: %d", index);
- if(userureg(ureg)){
- if(index != 0)
- panic("fpslot index %d != 0 for user", index);
- } else {
- if(index == 0)
- up->fpstate |= FPnouser;
- up->fpstate |= FPkernel;
- }
- while(up->fpslot[index] == nil)
- up->fpslot[index] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
- up->fpsave = up->fpslot[index];
- up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
- break;
- case FPinactive:
- /*
- * Before restoring the state, check for any pending
- * exceptions, there's no way to restore the state without
- * generating an unmasked exception.
- * More attention should probably be paid here to the
- * exception masks and error summary.
- */
- status = up->fpsave->fsw;
- control = up->fpsave->fcw;
- if((status & ~control) & 0x07F){
- mathnote(status, up->fpsave->rip);
- break;
- }
- fprestore(up->fpsave);
- up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
- break;
- case FPactive:
- panic("math emu pid %ld %s pc %#p",
- up->pid, up->text, ureg->pc);
- break;
- }
-}
-
-/*
- * math coprocessor segment overrun
- */
-static void
-mathover(Ureg*, void*)
-{
- pexit("math overrun", 0);
-}
-
-void
-mathinit(void)
-{
- trapenable(VectorCERR, matherror, 0, "matherror");
- if(m->cpuidfamily == 3)
- intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror");
- trapenable(VectorCNA, mathemu, 0, "mathemu");
- trapenable(VectorCSO, mathover, 0, "mathover");
- trapenable(VectorSIMD, simderror, 0, "simderror");
-}
-
-void
procsetup(Proc *p)
{
- p->fpstate = FPinit;
- _stts();
+ fpuprocsetup(p);
/* clear debug registers */
memset(p->dr, 0, sizeof(p->dr));
@@ -556,29 +312,10 @@
void
procfork(Proc *p)
{
- int s;
-
p->kentry = up->kentry;
p->pcycles = -p->kentry;
- /* save floating point state */
- s = splhi();
- switch(up->fpstate & ~FPillegal){
- case FPactive | FPpush:
- _clts();
- case FPactive:
- fpsave(up->fpsave);
- up->fpstate = FPinactive | (up->fpstate & FPpush);
- case FPactive | FPkernel:
- case FPinactive | FPkernel:
- case FPinactive | FPpush:
- case FPinactive:
- while(p->fpslot[0] == nil)
- p->fpslot[0] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
- memmove(p->fpsave = p->fpslot[0], up->fpslot[0], sizeof(FPsave));
- p->fpstate = FPinactive;
- }
- splx(s);
+ fpuprocfork(p);
}
void
@@ -594,6 +331,8 @@
if(p->vmx != nil)
vmxprocrestore(p);
+ fpuprocrestore(p);
+
if(p->kp)
return;
@@ -618,27 +357,7 @@
if(p->state == Moribund)
p->dr[7] = 0;
- switch(p->fpstate & ~(FPnouser|FPkernel|FPindexm)){
- case FPactive | FPpush:
- _clts();
- case FPactive:
- if(p->state == Moribund){
- _fnclex();
- _stts();
- break;
- }
- /*
- * Fpsave() stores without handling pending
- * unmasked exeptions. Postnote() can't be called
- * here as sleep() already has up->rlock, so
- * the handling of pending exceptions is delayed
- * until the process runs again and generates an
- * emulation fault to activate the FPU.
- */
- fpsave(p->fpsave);
- p->fpstate = FPinactive | (p->fpstate & ~FPactive);
- break;
- }
+ fpuprocsave(p);
/*
* While this processor is in the scheduler, the process could run
@@ -652,37 +371,4 @@
* especially on VMware, but it turns out not to matter.
*/
mmuflushtlb();
-}
-
-/*
- * Fpusave and fpurestore lazily save and restore FPU state across
- * system calls and the pagefault handler so that we can take
- * advantage of SSE instructions such as AES-NI in the kernel.
- */
-int
-fpusave(void)
-{
- int ostate = up->fpstate;
- if((ostate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
- _stts();
- up->fpstate = FPpush | (ostate & ~FPillegal);
- return ostate;
-}
-void
-fpurestore(int ostate)
-{
- int astate = up->fpstate;
- if(astate == (FPpush | (ostate & ~FPillegal))){
- if((ostate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
- _clts();
- } else {
- if(astate == FPinit) /* don't restore on procexec()/procsetup() */
- return;
- if((astate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
- _stts();
- up->fpsave = up->fpslot[ostate>>FPindexs];
- if(ostate & FPactive)
- ostate = FPinactive | (ostate & ~FPactive);
- }
- up->fpstate = ostate;
}