ref: cb2103879e7e1cb869ed1eb8455c468a756e7ef0
parent: e08cc065177138fe821abb84dc6381fd0400e944
author: cinap_lenrek <[email protected]>
date: Mon Dec 29 11:02:57 EST 2014
zymq: lilu dallas, multicore implement multiprocessor support.
--- a/sys/src/9/zynq/dat.h
+++ b/sys/src/9/zynq/dat.h
@@ -140,7 +140,6 @@
Proc* readied; /* for runproc */
ulong schedticks; /* next forced context switch */
- int cputype;
ulong delayloop;
/* stats */
--- a/sys/src/9/zynq/fns.h
+++ b/sys/src/9/zynq/fns.h
@@ -9,6 +9,7 @@
void procsave(Proc *);
void procrestore(Proc *);
void idlehands(void);
+void sendevent(void);
void coherence(void);
void procfork(Proc *);
void procsetup(Proc *);
@@ -37,7 +38,9 @@
void links(void);
void* vmap(uintptr, ulong);
void timerinit(void);
+void synccycles(void);
void setpmcr(ulong);
+void setpmcnten(ulong);
void* tmpmap(uintptr);
void tmpunmap(void*);
void flushpg(void *);
--- a/sys/src/9/zynq/intr.c
+++ b/sys/src/9/zynq/intr.c
@@ -44,6 +44,9 @@
mpcore[ICCICR] = 7;
mpcore[ICCBPR] = 3;
mpcore[ICCPMR] = 255;
+
+ if(m->machno != 0)
+ return;
/* disable all irqs and clear any pending interrupts */
for(i = 0; i < NINTR/32; i++){
@@ -66,7 +69,7 @@
panic("intrenable: invalid irq %d", irq);
if(type != LEVEL && type != EDGE)
panic("intrenable: invalid type %d", type);
- if(irqs[irq].f != nil)
+ if(irqs[irq].f != nil && irqs[irq].f != f)
panic("intrenable: handler already assigned");
if(irq >= NPRIVATE){
e = &mpcore[ICDIPTR + (irq >> 2)];
--- a/sys/src/9/zynq/l.s
+++ b/sys/src/9/zynq/l.s
@@ -18,7 +18,7 @@
PUTC('l')
MOVW $SECSZ, R0
- MOVW $(CPU0L1-KZERO), R4
+ MOVW $(MACHL1(0)-KZERO), R4
MOVW $KZERO, R1
ADD R1>>(SECSH-2), R4, R1
MOVW $(L1SEC|L1CACHED|L1KERRW), R2
@@ -42,13 +42,20 @@
BGE _start2
MOVW $(UART_BASE|L2VALID|L2DEVICE|L2KERRW), R0
- MOVW $(VMAPL2 - KZERO), R1
+ MOVW $(VMAPL2-KZERO), R1
MOVW R0, (R1)
-
+
PUTC('n')
+
+ MOVW $(MACH(0)-KZERO), R(Rmach)
+_start3:
+ /* enable MMU permission checking */
+ MOVW $0x55555555, R0
+ MCR 15, 0, R0, C(3), C(0), 0
+
MOVW $0, R0
MCR 15, 0, R0, C(8), C(7), 0
- MOVW $(CPU0L1 - KZERO | TTBATTR), R1
+ ADD $TTBATTR, R4, R1
MCR 15, 0, R1, C(2), C(0), 0
MOVW $0x20c5047b, R1
MOVW $_virt(SB), R2
@@ -59,17 +66,18 @@
TEXT _virt(SB), $-4
DSB
ISB
-
- MOVW $(MACH(0) + MACHSIZE), R13
- MOVW $(MACH(0) + 12), R0
+
+ ADD $KZERO, R(Rmach)
+ MOVW R(Rmach), R13
+ ADD $MACHSIZE, R13
+
+ MOVW R(Rmach), R0
+ ADD $12, R0
BL loadsp(SB)
+
MOVW $vectors(SB), R1
MCR 15, 0, R1, C(12), C(0)
- /* enable MMU permission checking */
- MOVW $0x55555555, R0
- MCR 15, 0, R0, C(3), C(0), 0
-
/* enable maths coprocessors in CPACR but disable them in FPEXC */
MRC 15, 0, R0, C(1), C(0), 2
ORR $(15<<20), R0
@@ -96,14 +104,25 @@
MOVW $(VMAP+0x30), R8
PUTC('9')
+ /* kernel Mach* in TPIDRPRW */
+ MCR 15, 0, R(Rmach), C(13), C(0), 4
+
MOVW $setR12(SB), R12
- MOVW $MACH(0), R(Rmach)
MOVW $0, R(Rup)
+
BL main(SB)
B idlehands(SB)
BL _div(SB) /* hack to load _div */
+TEXT mpbootstrap(SB), $-4
+ MOVW $0xE0001030, R8
+ PUTC('M')
+ PUTC('P')
+ MOVW $(MACH(1)-KZERO), R(Rmach)
+ MOVW $(MACHL1(1)-KZERO), R4
+ B _start3
+
TEXT touser(SB), $-4
CPS(CPSID)
@@ -238,6 +257,10 @@
WFE
RET
+TEXT sendevent(SB), $0
+ SEV
+ RET
+
TEXT ttbget(SB), $0
MRC 15, 0, R0, C(2), C(0), 0
BIC $0x7f, R0
@@ -282,6 +305,10 @@
MCR 15, 0, R0, C(9), C(12), 0
RET
+TEXT setpmcnten(SB), $0
+ MCR 15, 0, R0, C(9), C(12), 1
+ RET
+
TEXT perfticks(SB), $0
MRC 15, 0, R0, C(9), C(13), 0
RET
@@ -453,3 +480,4 @@
DSB
MRC 15, 0, R0, C(7), C(4), 0
RET
+
--- a/sys/src/9/zynq/ltrap.s
+++ b/sys/src/9/zynq/ltrap.s
@@ -39,7 +39,8 @@
SUB $(18*4), R13
MOVM.IA [R0-R14], (R13)
- MOVW $MACH(0), R(Rmach) /* FIXME */
+ /* get Mach* from TPIDRPRW */
+ MRC 15, 0, R(Rmach), C(13), C(0), 4
MOVW 8(R(Rmach)), R(Rup)
MOVW $setR12(SB), R12
@@ -79,7 +80,8 @@
MOVM.DB.S [R0-R14], (R13)
SUB $(15*4), R13
- MOVW $MACH(0), R(Rmach) /* FIXME */
+ /* get Mach* from TPIDRPRW */
+ MRC 15, 0, R(Rmach), C(13), C(0), 4
MOVW 8(R(Rmach)), R(Rup)
MOVW $setR12(SB), R12
--- a/sys/src/9/zynq/main.c
+++ b/sys/src/9/zynq/main.c
@@ -182,9 +182,9 @@
int i;
conf.nmach = 1;
- conf.nproc = 100;
+ conf.nproc = 2000;
conf.ialloc = 16*1024*1024;
- conf.nimage = conf.nproc;
+ conf.nimage = 200;
conf.mem[0].base = PGROUND((ulong)end - KZERO);
conf.mem[0].limit = 1024*1024*1024;
conf.npage = 0;
@@ -347,9 +347,63 @@
}
void
+cpuidprint(void)
+{
+ print("\ncpu%d: %dMHz ARM Cortex-A9\n", m->machno, m->cpumhz);
+}
+
+void
+mpinit(void)
+{
+ extern void mpbootstrap(void); /* l.s */
+ Mach *m1;
+ ulong *v;
+ int i;
+
+ if(getconf("*nomp"))
+ return;
+
+ conf.nmach = 2;
+ conf.copymode = 1;
+
+ m1 = MACHP(1);
+ memset(m1, 0, MACHSIZE);
+ m1->machno = 1;
+ m1->l1.pa = MACHL1(m1->machno)-KZERO;
+ m1->l1.va = KADDR(m1->l1.pa);
+
+ memset(m1->l1.va, 0, L1SZ);
+ for(i=0; i<L1X(VMAPSZ); i++)
+ m1->l1.va[L1X(VMAP)+i] = m->l1.va[L1X(VMAP)+i];
+ for(i=0; i<L1X(-KZERO); i++)
+ m1->l1.va[L1X(KZERO)+i] = m->l1.va[L1X(KZERO)+i];
+ coherence();
+ cleandse((uchar*)KZERO, (uchar*)0xFFFFFFFF);
+
+ v = tmpmap(0xFFFFF000);
+ v[0xFF0/4] = PADDR(mpbootstrap);
+ coherence();
+ cleandse(v, (uchar*)v+BY2PG);
+ tmpunmap(v);
+
+ sendevent();
+ synccycles();
+}
+
+void
main(void)
{
- active.machs = 1;
+ active.machs |= (1 << m->machno);
+ if(m->machno != 0){
+ mmuinit();
+ intrinit();
+ timerinit();
+ cpuidprint();
+ synccycles();
+ timersinit();
+ schedinit();
+ return;
+ }
uartinit();
mmuinit();
l2init();
@@ -361,6 +415,7 @@
xinit();
printinit();
quotefmtinstall();
+ cpuidprint();
sanity();
todinit();
timersinit();
@@ -376,5 +431,6 @@
swapinit();
screeninit();
userinit();
+ mpinit();
schedinit();
}
--- a/sys/src/9/zynq/mem.h
+++ b/sys/src/9/zynq/mem.h
@@ -40,8 +40,8 @@
#define MACHSIZE 8192
#define MACH(n) (KZERO+(n)*MACHSIZE)
#define MACHP(n) ((Mach *)MACH(n))
-#define CPU0L1 ROUND(MACH(MAXMACH), L1SZ)
-#define VMAPL2 (CPU0L1 + L1SZ)
+#define MACHL1(n) (ROUND(MACH(MAXMACH), L1SZ) + (n)*L1SZ)
+#define VMAPL2 MACHL1(MAXMACH)
#define VMAPL2SZ (L2SZ * (VMAPSZ / SECSZ))
#define TMAPL2(n) (VMAPL2 + VMAPL2SZ + (n) * L2SZ)
#define TMAPL2SZ (MAXMACH * L2SZ)
@@ -80,6 +80,7 @@
#define DSB WORD $0xf57ff04f
#define ISB WORD $0xf57ff06f
#define WFE WORD $0xe320f002
+#define SEV WORD $0xe320f004
#define CPS(m) WORD $(0xf1000000|(m))
#define CPSMODE (1<<17)
#define CPSIE (3<<6|2<<18)
@@ -121,4 +122,4 @@
#define L2WRITE 0
#define L2LOCAL (1<<11)
-#define TTBATTR (1<<6|1<<3)
+#define TTBATTR (1<<6|1<<3|1<<1)
--- a/sys/src/9/zynq/mmu.c
+++ b/sys/src/9/zynq/mmu.c
@@ -12,10 +12,13 @@
{
m->l1.pa = ttbget();
m->l1.va = KADDR(m->l1.pa);
- mpcore = vmap(MPCORE_BASE, 0x2000);
- slcr = vmap(SLCR_BASE, 0x1000);
+ memset((uchar*)TMAPL2(m->machno), 0, TMAPL2SZ);
m->l1.va[L1X(TMAP)] = PADDR(TMAPL2(m->machno)) | L1PT;
incref(&m->l1);
+ if(mpcore != nil)
+ return;
+ mpcore = vmap(MPCORE_BASE, 0x2000);
+ slcr = vmap(SLCR_BASE, 0x1000);
}
void
@@ -38,29 +41,28 @@
int s;
s = splhi();
- if(m->l1free != nil){
- p = m->l1free;
+ p = m->l1free;
+ if(p != nil){
+ m->l1free = p->next;
p->next = nil;
- m->l1free = m->l1free->next;
m->nfree--;
splx(s);
return p;
- }else{
- p = smalloc(sizeof(L1));
- for(;;){
- p->va = mallocalign(L1SZ, L1SZ, 0, 0);
- if(p->va != nil)
- break;
- if(!waserror()){
- resrcwait("no memory for L1 table");
- poperror();
- }
+ }
+ splx(s);
+ p = smalloc(sizeof(L1));
+ for(;;){
+ p->va = mallocalign(L1SZ, L1SZ, 0, 0);
+ if(p->va != nil)
+ break;
+ if(!waserror()){
+ resrcwait("no memory for L1 table");
+ poperror();
}
- memmove(p->va, m->l1.va, L1SZ);
- p->pa = PADDR(p->va);
- splx(s);
- return p;
}
+ p->pa = PADDR(p->va);
+ memmove(p->va, m->l1.va, L1SZ);
+ return p;
}
static void
@@ -89,11 +91,9 @@
p = l1alloc();
s = splhi();
if(up->l1 != nil)
- l1free(p);
- else{
- up->l1 = p;
- l1switch(p, 1);
- }
+ panic("upalloc1: up->l1 != nil");
+ up->l1 = p;
+ l1switch(p, 1);
splx(s);
}
@@ -114,6 +114,7 @@
*t = 0;
l = &p->next;
}
+ proc->l1->va[L1X(TMAP)] = 0;
*l = proc->mmufree;
proc->mmufree = proc->mmuused;
proc->mmuused = 0;
@@ -139,6 +140,7 @@
ulong *e;
ulong *l2;
PTE old;
+ char *ctl;
uintptr l2p;
int s;
@@ -178,10 +180,11 @@
splx(s);
if((old & L2VALID) != 0)
flushpg((void *) va);
- if(pg->cachectl[0] == PG_TXTFLUSH){
+ ctl = &pg->cachectl[m->machno];
+ if(*ctl == PG_TXTFLUSH){
cleandse((void *) va, (void *) (va + BY2PG));
invalise((void *) va, (void *) (va + BY2PG));
- pg->cachectl[0] = PG_NOFLUSH;
+ *ctl = PG_NOFLUSH;
}
}
@@ -188,7 +191,6 @@
void
checkmmu(uintptr, uintptr)
{
- print("checkmmu\n");
}
void
@@ -286,7 +288,7 @@
e = &up->l1->va[L1X(KMAP)];
if((*e & 3) == 0){
if(up->kmaptable != nil)
- panic("kmaptable");
+ panic("kmaptable != nil");
up->kmaptable = newpage(0, 0, 0);
s = splhi();
v = tmpmap(up->kmaptable->pa);
@@ -300,7 +302,7 @@
return (KMap *) KMAP;
}
if(up->kmaptable == nil)
- panic("kmaptable");
+ panic("kmaptable == nil");
e = (ulong *) (KMAP + NKMAP * BY2PG);
for(i = 0; i < NKMAP; i++)
if((e[i] & 3) == 0){
@@ -338,7 +340,6 @@
tmpmap(ulong pa)
{
ulong *u, *ub, *ue;
- void *v;
if(islo())
panic("tmpmap: islow %#p", getcallerpc(&pa));
@@ -349,9 +350,13 @@
for(u = ub; u < ue; u++)
if((*u & 3) == 0){
*u = pa | L2VALID | L2CACHED | L2KERRW;
+
+ assert(m->l1.va[L1X(TMAP)] != 0);
+ if(up != nil && up->l1 != nil)
+ up->l1->va[L1X(TMAP)] = m->l1.va[L1X(TMAP)];
+
coherence();
- v = (void *) ((u - ub) * BY2PG + TMAP);
- return v;
+ return (void *) ((u - ub) * BY2PG + TMAP);
}
panic("tmpmap: full (pa=%#.8lux)", pa);
return nil;
@@ -361,7 +366,7 @@
tmpunmap(void *v)
{
ulong *u;
-
+
if(v >= (void*) KZERO)
return;
if(v < (void*)TMAP || v >= (void*)(TMAP + TMAPSZ))
--- a/sys/src/9/zynq/timer.c
+++ b/sys/src/9/zynq/timer.c
@@ -80,12 +80,39 @@
void
timerinit(void)
{
- int mhz;
-
- mhz = PS_CLK * (slcr[ARM_PLL_CTRL] >> 12 & 0x7f) / (slcr[ARM_CLK_CTRL] >> 8 & 0x3f);
- timerhz = mhz * 500000;
+ m->cpumhz = PS_CLK * (slcr[ARM_PLL_CTRL] >> 12 & 0x7f) / (slcr[ARM_CLK_CTRL] >> 8 & 0x3f);
+ m->cpuhz = m->cpumhz * 1000000;
+ timerhz = m->cpuhz / 2;
mpcore[GTIMERCTL] = TIMERDIV - 1 << 8 | 3;
mpcore[LTIMERCTL] = LTIMERDIV - 1 << 8 | 4;
intrenable(TIMERIRQ, timerirq, nil, EDGE, "clock");
+
+ /* enable and reset cycle counter register */
+ m->cyclefreq = m->cpuhz;
+ setpmcnten((1<<31));
+ coherence();
setpmcr(7);
+}
+
+/*
+ * synchronize all cpu's cycle counter registers
+ */
+void
+synccycles(void)
+{
+ static Ref r1, r2;
+ int s;
+
+ s = splhi();
+ r2.ref = 0;
+ incref(&r1);
+ while(r1.ref != conf.nmach)
+ ;
+ setpmcr(7);
+ m->cycleshi = MACHP(0)->cycleshi;
+ incref(&r2);
+ while(r2.ref != conf.nmach)
+ ;
+ r1.ref = 0;
+ splx(s);
}
--- a/sys/src/9/zynq/trap.c
+++ b/sys/src/9/zynq/trap.c
@@ -20,7 +20,7 @@
iprint("dumpstack disabled\n");
return;
}
- iprint("dumpstack\n");
+ iprint("cpu%d: dumpstack\n", m->machno);
x = 0;
x += iprint("ktrace /arm/9zynq %.8lux %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp, ureg->r14);
@@ -57,7 +57,7 @@
{
int user, insyscall, read, n;
static char buf[ERRMAX];
-
+
read = (fsr & (1<<11)) == 0;
user = userureg(ureg);
if(!user){
@@ -88,6 +88,8 @@
static void
mathtrap(Ureg *, ulong)
{
+ int s;
+
if((up->fpstate & FPillegal) != 0){
postnote(up, 1, "sys: floating point in note handler", NDebug);
return;
@@ -94,12 +96,16 @@
}
switch(up->fpstate){
case FPinit:
+ s = splhi();
fpinit();
up->fpstate = FPactive;
+ splx(s);
break;
case FPinactive:
+ s = splhi();
fprestore(&up->fpsave);
up->fpstate = FPactive;
+ splx(s);
break;
case FPactive:
postnote(up, 1, "sys: floating point error", NDebug);
@@ -138,6 +144,7 @@
postnote(up, 1, "sys: trap: invalid opcode", NDebug);
break;
}
+ dumpregs(ureg);
panic("invalid opcode at pc=%#.8lux lr=%#.8lux", ureg->pc, ureg->r14);
break;
case PsrMiabt:
@@ -153,7 +160,7 @@
intr(ureg);
break;
default:
- print("unknown trap type %ulx\n", ureg->type);
+ iprint("cpu%d: unknown trap type %ulx\n", m->machno, ureg->type);
}
splhi();
if(user){
@@ -408,9 +415,17 @@
}
void
-dumpregs(Ureg *)
+dumpregs(Ureg *ureg)
{
- print("dumpregs\n");
+ iprint("trap: %lux psr %8.8lux type %2.2lux pc %8.8lux link %8.8lux\n",
+ ureg->type, ureg->psr, ureg->type, ureg->pc, ureg->link);
+ iprint("R14 %8.8lux R13 %8.8lux R12 %8.8lux R11 %8.8lux R10 %8.8lux\n",
+ ureg->r14, ureg->r13, ureg->r12, ureg->r11, ureg->r10);
+ iprint("R9 %8.8lux R8 %8.8lux R7 %8.8lux R6 %8.8lux R5 %8.8lux\n",
+ ureg->r9, ureg->r8, ureg->r7, ureg->r6, ureg->r5);
+ iprint("R4 %8.8lux R3 %8.8lux R2 %8.8lux R1 %8.8lux R0 %8.8lux\n",
+ ureg->r4, ureg->r3, ureg->r2, ureg->r1, ureg->r0);
+ iprint("pc %#lux link %#lux\n", ureg->pc, ureg->link);
}
void
@@ -476,7 +491,7 @@
cycles(&t);
p->kentry -= t;
p->pcycles += t;
-
+
l1switch(&m->l1, 0);
}