ref: 81b7451972780743bcd00c963a1c9cb686a3e4ad
parent: 213bf5089365d00d9d40635bcfe62e197d548c1b
author: jpathy <[email protected]>
date: Wed May 22 19:47:05 EDT 2013
sse kernel support (sources)
--- a/sys/src/9/pc/dat.h
+++ b/sys/src/9/pc/dat.h
@@ -2,7 +2,9 @@
typedef struct BIOS32ci BIOS32ci;
typedef struct Conf Conf;
typedef struct Confmem Confmem;
-typedef struct FPsave FPsave;
+typedef union FPsave FPsave;
+typedef struct FPssestate FPssestate;
+typedef struct FPstate FPstate;
typedef struct ISAConf ISAConf;
typedef struct Label Label;
typedef struct Lock Lock;
@@ -64,7 +66,7 @@
FPillegal= 0x100,
};
-struct FPsave
+struct FPstate
{
ushort control;
ushort r1;
@@ -81,6 +83,33 @@
uchar regs[80]; /* floating point registers */
};
+struct FPssestate /* SSE fp state */
+{
+ ushort fcw; /* control */
+ ushort fsw; /* status */
+ ushort ftw; /* tag */
+ ushort fop; /* opcode */
+ ulong fpuip; /* pc */
+ ushort cs; /* pc segment */
+ ushort r1; /* reserved */
+ ulong fpudp; /* data pointer */
+ ushort ds; /* data pointer segment */
+ ushort r2;
+ ulong mxcsr; /* MXCSR register state */
+ ulong mxcsr_mask; /* MXCSR mask register */
+ uchar xregs[480]; /* extended registers */
+ uchar alignpad[FPalign];
+};
+
+/*
+ * the FP regs must be stored here, not somewhere pointed to from here.
+ * port code assumes this.
+ */
+union FPsave {
+ FPstate;
+ FPssestate;
+};
+
struct Confmem
{
ulong base;
@@ -227,6 +256,7 @@
uvlong tscticks;
int pdballoc;
int pdbfree;
+ FPsave *fpsavalign;
vlong mtrrcap;
vlong mtrrdef;
@@ -297,6 +327,7 @@
Clflush = 1<<19,
Acpif = 1<<22, /* therm control msr */
Mmx = 1<<23,
+ Fxsr = 1<<24, /* have SSE FXSAVE/FXRSTOR */
Sse = 1<<25, /* thus sfence instr. */
Sse2 = 1<<26, /* thus mfence & lfence instr.s */
Rdrnd = 1<<30, /* RDRAND support bit */
--- a/sys/src/9/pc/devarch.c
+++ b/sys/src/9/pc/devarch.c
@@ -38,6 +38,11 @@
Qmax = 16,
};
+
+enum {
+ CR4Osfxsr = 1 << 9,
+};
+
enum { /* cpuid standard function codes */
Highstdfunc = 0, /* also returns vendor string */
Procsig,
@@ -848,6 +853,15 @@
putcr4(cr4);
if(m->cpuiddx & Mce)
rdmsr(0x01, &mct);
+ }
+
+ if(m->cpuiddx & Fxsr){ /* have sse fp? */
+ fpsave = fpssesave;
+ fprestore = fpsserestore;
+ putcr4(getcr4() | CR4Osfxsr);
+ } else {
+ fpsave = fpx87save;
+ fprestore = fpx87restore;
}
cputype = t;
--- a/sys/src/9/pc/fns.h
+++ b/sys/src/9/pc/fns.h
@@ -33,9 +33,15 @@
void fpenv(FPsave*);
void fpinit(void);
void fpoff(void);
-void fprestore(FPsave*);
-void fpsave(FPsave*);
+void (*fprestore)(FPsave*);
+void (*fpsave)(FPsave*);
+void fpsserestore(FPsave*);
+void fpsserestore0(FPsave*);
+void fpssesave(FPsave*);
+void fpssesave0(FPsave*);
ulong fpstatus(void);
+void fpx87restore(FPsave*);
+void fpx87save(FPsave*);
ulong getcr0(void);
ulong getcr2(void);
ulong getcr3(void);
--- a/sys/src/9/pc/l.s
+++ b/sys/src/9/pc/l.s
@@ -657,13 +657,13 @@
WAIT
RET
-TEXT fpsave(SB), $0 /* save state and disable */
+TEXT fpx87save(SB), $0 /* save state and disable */
MOVL p+0(FP), AX
FSAVE 0(AX) /* no WAIT */
FPOFF(l2)
RET
-TEXT fprestore(SB), $0 /* enable and restore state */
+TEXT fpx87restore(SB), $0 /* enable and restore state */
FPON
MOVL p+0(FP), AX
FRSTOR 0(AX)
@@ -683,6 +683,19 @@
FPON
FCLEX /* no WAIT */
FPOFF(l3)
+ RET
+
+TEXT fpssesave0(SB), $0 /* save state and disable */
+ MOVL p+0(FP), AX
+ FXSAVE 0(AX) /* no WAIT */
+ FPOFF(l4)
+ RET
+
+TEXT fpsserestore0(SB), $0 /* enable and restore state */
+ FPON
+ MOVL p+0(FP), AX
+ FXRSTOR 0(AX)
+ WAIT
RET
/*
--- a/sys/src/9/pc/main.c
+++ b/sys/src/9/pc/main.c
@@ -480,13 +480,36 @@
};
static void
+mathstate(ulong *stsp, ulong *pcp, ulong *ctlp)
+{
+ ulong sts, fpc, ctl;
+ FPsave *f = &up->fpsave;
+
+ if(fpsave == fpx87save){
+ sts = f->status;
+ fpc = f->pc;
+ ctl = f->control;
+ } else {
+ sts = f->fsw;
+ fpc = f->fpuip;
+ ctl = f->fcw;
+ }
+ if(stsp)
+ *stsp = sts;
+ if(pcp)
+ *pcp = fpc;
+ if(ctlp)
+ *ctlp = ctl;
+}
+
+static void
mathnote(void)
{
int i;
- ulong status;
+ ulong status, pc;
char *msg, note[ERRMAX];
- status = up->fpsave.status;
+ mathstate(&status, &pc, nil);
/*
* Some attention should probably be paid here to the
@@ -514,11 +537,49 @@
}
/*
+ * sse fp save and restore buffers have to be 16-byte (FPalign) aligned,
+ * so we shuffle the data up and down as needed or make copies.
+ */
+
+void
+fpssesave(FPsave *fps)
+{
+ FPsave *afps;
+
+ afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
+ fpssesave0(afps);
+ if (fps != afps) /* not aligned? shuffle down from aligned buffer */
+ memmove(fps, afps, sizeof(FPssestate) - FPalign);
+}
+
+void
+fpsserestore(FPsave *fps)
+{
+ FPsave *afps;
+
+ afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
+ if (fps != afps) {
+ if (m->fpsavalign == nil)
+ m->fpsavalign = mallocalign(sizeof(FPssestate),
+ FPalign, 0, 0);
+ if (m->fpsavalign)
+ afps = m->fpsavalign;
+ /* copy or shuffle up to make aligned */
+ memmove(afps, fps, sizeof(FPssestate) - FPalign);
+ }
+ fpsserestore0(afps);
+ /* if we couldn't make a copy, shuffle regs back down */
+ if (fps != afps && afps != m->fpsavalign)
+ memmove(fps, afps, sizeof(FPssestate) - FPalign);
+}
+
+/*
* math coprocessor error
*/
static void
matherror(Ureg *ur, void*)
{
+ ulong status, pc;
/*
* a write cycle to port 0xF0 clears the interrupt latch attached
* to the error# line from the 387
@@ -532,9 +593,11 @@
fpenv(&up->fpsave);
mathnote();
- if((ur->pc & 0xf0000000) == KZERO)
+ if((ur->pc & 0xf0000000) == KZERO){
+ mathstate(&status, &pc, nil);
panic("fp: status %ux fppc=0x%lux pc=0x%lux",
up->fpsave.status, up->fpsave.pc, ur->pc);
+ }
}
/*
@@ -543,6 +606,8 @@
static void
mathemu(Ureg *ureg, void*)
{
+ ulong status, control;
+
if(up->fpstate & FPillegal){
/* someone did floating point in a note handler */
postnote(up, 1, "sys: floating point in note handler", NDebug);
@@ -561,7 +626,8 @@
* More attention should probably be paid here to the
* exception masks and error summary.
*/
- if((up->fpsave.status & ~up->fpsave.control) & 0x07F){
+ mathstate(&status, nil, &control);
+ if((status & ~control) & 0x07F){
mathnote();
break;
}
--- a/sys/src/9/pc/mem.h
+++ b/sys/src/9/pc/mem.h
@@ -19,6 +19,7 @@
#define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1))
#define PGROUND(s) ROUND(s, BY2PG)
#define BLOCKALIGN 8
+#define FPalign 16
/*
* In 32-bit mode, the MAXMACH limit is 32 without
--- a/sys/src/cmd/8l/optab.c
+++ b/sys/src/cmd/8l/optab.c
@@ -434,12 +434,6 @@
Ymr, Yrl, Zm_r_xm, 1,
0
};
-uchar yxaes[] =
-{
- Yxm, Yxr, Zm_r_xm, 2,
- Yxm, Yxr, Zm_r_i_xm, 2,
- 0
-};
Optab optab[] =
/* as, ytab, andproto, opcode */