ref: b0d226705cec8f36339adce5d95b4feda1deba02
parent: 17f0b2ce38f3c845a3bbbe0630185ca8dac684a4
author: cinap_lenrek <[email protected]>
date: Wed Nov 7 11:48:14 EST 2018
bcm: speed up co-processor operations by avoiding i+d cache flush on each operation coproc.c generated the instrucitons anew each time, requiering a i+d cache flush for each operation. instead, we can speed this up like this: given that the coprocessor registers are per cpu, we can assume that interrupts have already been disabled by the caller to prevent a process switch to another cpu. we cache the instructions generated in a static append only buffer and maintain separate end pointers for each cpu. the cache flushes only need to be done when new operations have been added to the buffer.
--- a/sys/src/9/bcm/coproc.c
+++ b/sys/src/9/bcm/coproc.c
@@ -1,1 +1,163 @@
-#include "../teg2/coproc.c"
+/*
+ * arm co-processors
+ * mainly to cope with arm hard-wiring register numbers into instructions.
+ *
+ * CP15 (system control) is the one that gets used the most in practice.
+ *
+ * these routines must be callable from KZERO.
+ *
+ * on a multiprocessor, process switching to another cpu is assumed
+ * to be inhibited by the caller as these registers are local to the cpu.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+#include "arm.h"
+
+enum {
+ /* alternates: 0xe12fff1e BX (R14); last e is R14 */
+ /* 0xe28ef000 B 0(R14); second e is R14 (ken) */
+ Retinst = 0xe1a0f00e, /* MOV R14, R15 */
+
+ Opmask = MASK(3),
+ Regmask = MASK(4),
+};
+
+static void*
+mkinstr(ulong wd)
+{
+ static ulong ib[256], *ep[MAXMACH+1];
+ static Lock lk;
+ ulong *ip, *ie;
+
+ ie = ep[m->machno];
+ for(ip = ib; ip < ie; ip += 2)
+ if(*ip == wd)
+ return ip;
+
+ ilock(&lk);
+ ie = ep[MAXMACH];
+ for(; ip < ie; ip += 2)
+ if(*ip == wd)
+ goto Found;
+ if(ip >= &ib[nelem(ib)])
+ panic("mkinstr: out of instrucuction buffer");
+ ip[0] = wd;
+ ip[1] = Retinst;
+ ep[MAXMACH] = ie = ip + 2;
+ cachedwbse(ip, 2*sizeof(*ip));
+Found:
+ iunlock(&lk);
+ cacheiinv();
+ ep[m->machno] = ie;
+ return ip;
+}
+
+
+static void*
+setupcpop(ulong opcode, int cp, int op1, int crn, int crm,
+ int op2)
+{
+ op1 &= Opmask;
+ op2 &= Opmask;
+ crn &= Regmask;
+ crm &= Regmask;
+ cp &= Regmask;
+ return mkinstr(opcode | op1 << 21 | crn << 16 | cp << 8 | op2 << 5 | crm);
+}
+
+ulong
+cprd(int cp, int op1, int crn, int crm, int op2)
+{
+ /*
+ * MRC. return value will be in R0, which is convenient.
+ * Rt will be R0.
+ */
+ ulong (*fp)(void) = setupcpop(0xee100010, cp, op1, crn, crm, op2);
+ return fp();
+}
+
+void
+cpwr(int cp, int op1, int crn, int crm, int op2, ulong val)
+{
+ /* MCR, Rt is R0 */
+ void (*fp)(ulong) = setupcpop(0xee000010, cp, op1, crn, crm, op2);
+ fp(val);
+}
+
+ulong
+cprdsc(int op1, int crn, int crm, int op2)
+{
+ return cprd(CpSC, op1, crn, crm, op2);
+}
+
+void
+cpwrsc(int op1, int crn, int crm, int op2, ulong val)
+{
+ cpwr(CpSC, op1, crn, crm, op2, val);
+}
+
+/* floating point */
+
+/* fp coproc control */
+static void*
+setupfpctlop(int opcode, int fpctlreg)
+{
+ fpctlreg &= Nfpctlregs - 1;
+ return mkinstr(opcode | fpctlreg << 16 | 0 << 12 | CpFP << 8);
+}
+
+ulong
+fprd(int fpreg)
+{
+ /*
+ * VMRS. return value will be in R0, which is convenient.
+ * Rt will be R0.
+ */
+ ulong (*fp)(void) = setupfpctlop(0xeef00010, fpreg);
+ return fp();
+}
+
+void
+fpwr(int fpreg, ulong val)
+{
+ /*
+ * fpu might be off and this VMSR might enable it
+ * VMSR, Rt is R0
+ */
+ void (*fp)(ulong) = setupfpctlop(0xeee00010, fpreg);
+ fp(val);
+}
+
+/* fp register access; don't bother with single precision */
+static void*
+setupfpop(int opcode, int fpreg)
+{
+ ulong wd = opcode | 0 << 16 | (fpreg & (16 - 1)) << 12;
+ if (fpreg >= 16)
+ wd |= 1 << 22; /* high bit of dfp reg # */
+ return mkinstr(wd);
+}
+
+ulong
+fpsavereg(int fpreg, uvlong *fpp)
+{
+ /*
+ * VSTR. pointer will be in R0, which is convenient.
+ * Rt will be R0.
+ */
+ ulong (*fp)(uvlong *) = setupfpop(0xed000000 | CpDFP << 8, fpreg);
+ return fp(fpp);
+}
+
+void
+fprestreg(int fpreg, uvlong val)
+{
+ /* VLDR, Rt is R0 */
+ void (*fp)(uvlong *) = setupfpop(0xed100000 | CpDFP << 8, fpreg);
+ fp(&val);
+}
--- a/sys/src/9/bcm/vfp3.c
+++ b/sys/src/9/bcm/vfp3.c
@@ -338,8 +338,12 @@
void
fpusysprocsetup(Proc *p)
{
+ int s;
+
+ s = splhi();
p->fpstate = FPinit;
fpoff();
+ splx(s);
}
static void