ref: 334c5e1134719a02c35c72ec5435a967a74846f8
parent: 66b6185845e85258f1408271d5f705aacfa6ffdb
author: Sigrid <[email protected]>
date: Sun Dec 6 14:31:56 EST 2020
amd64: FP: always use enough to fit AVX state and align to 64 bytes
--- a/sys/src/9/pc64/dat.h
+++ b/sys/src/9/pc64/dat.h
@@ -242,7 +242,6 @@
u64int dr7; /* shadow copy of dr7 */
u64int xcr0;
u32int fpsavesz;
- u32int fpalign;
void* vmx;
--- a/sys/src/9/pc64/fpu.c
+++ b/sys/src/9/pc64/fpu.c
@@ -16,8 +16,7 @@
uintptr cr4;
ulong regs[4];
- m->fpsavesz = sizeof(FPssestate);
- m->fpalign = 16;
+ m->fpsavesz = sizeof(FPsave); /* always enough to fit sse+avx */
if((m->cpuiddx & (Sse|Fxsr)) == (Sse|Fxsr)){ /* have sse fp? */
cr4 = getcr4() | CR4Osfxsr|CR4Oxmmex;
putcr4(cr4);
@@ -34,7 +33,6 @@
cpuid(0xd, 0, regs);
m->fpsavesz = regs[1];
- m->fpalign = 64;
cpuid(0xd, 1, regs);
if(regs[0] & Xsaveopt)
--- a/sys/src/9/pc64/main.c
+++ b/sys/src/9/pc64/main.c
@@ -488,7 +488,7 @@
up->fpstate |= FPkernel;
}
while(up->fpslot[index] == nil)
- up->fpslot[index] = mallocalign(m->fpsavesz, m->fpalign, 0, 0);
+ up->fpslot[index] = mallocalign(m->fpsavesz, FPalign, 0, 0);
up->fpsave = up->fpslot[index];
up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
break;
@@ -574,7 +574,7 @@
case FPinactive | FPpush:
case FPinactive:
while(p->fpslot[0] == nil)
- p->fpslot[0] = mallocalign(m->fpsavesz, m->fpalign, 0, 0);
+ p->fpslot[0] = mallocalign(m->fpsavesz, FPalign, 0, 0);
memmove(p->fpsave = p->fpslot[0], up->fpslot[0], m->fpsavesz);
p->fpstate = FPinactive;
}
--- a/sys/src/9/pc64/mem.h
+++ b/sys/src/9/pc64/mem.h
@@ -26,6 +26,7 @@
#define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1))
#define PGROUND(s) ROUND(s, BY2PG)
#define BLOCKALIGN 8
+#define FPalign 64
#define MAXMACH 128 /* max # cpus system can run */