ref: a00b6bdbfa72a1688a866edf5f825720c9874ada
dir: /sys/src/9/teg2/cache-l2-pl310.c/
/* * PL310 level 2 cache (non-architectural bag on the side) * * guaranteed to work incorrectly with default settings; must set Sharovr. * * clean & invalidate (wbinv) is buggy, so we work around erratum 588369 * by disabling write-back and cache line-fill before, and restoring after. */ #include "u.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "io.h" #include "../port/error.h" #include "arm.h" #define NWAYS(l2p) ((l2p)->auxctl & Assoc16way? 16: 8) #define L2P ((L2pl310 *)soc.l2cache) enum { L2size = 1024 * 1024, /* according to the tegra 2 manual */ Wayszgran = 16 * KiB, /* granularity of way sizes */ }; typedef struct L2pl310 L2pl310; typedef struct Pl310op Pl310op; struct Pl310op { ulong pa; ulong _pad; ulong indexway; ulong way; }; struct L2pl310 { ulong id; ulong type; uchar _pad0[0x100 - 0x8]; ulong ctl; ulong auxctl; uchar _pad1[0x730 - 0x108]; /* boring regs */ ulong sync; uchar _pad2[0x740 - 0x734]; ulong r3p0sync; /* workaround for r3p0 bug */ uchar _pad3[0x770 - 0x744]; Pl310op inv; /* inv.indexway doesn't exist */ uchar _pad4[0x7b0 - 0x780]; Pl310op clean; uchar _pad5[0x7f0 - 0x7c0]; Pl310op cleaninv; uchar _pad6[0xc00 - 0x7d0]; ulong filtstart; ulong filtend; uchar _pad6[0xf40 - 0xc08]; ulong debug; /* ... */ }; enum { /* ctl bits */ L2enable = 1, /* auxctl bits */ Ipref = 1<<29, /* prefetch enables */ Dpref = 1<<28, Mbo = 1<<25, Sharovr = 1<<22, /* shared attribute override (i.e., work right!) */ Parity = 1<<21, Waycfgshift= 17, Waycfgmask = (1<<3) - 1, Assoc16way = 1<<16, /* * optim'n to 0 cache lines; must be enabled in a9(?!). * set CpAClwr0line on all cpus 1st. */ Fullline0= 1<<0, /* debug bits */ Wt = 1<<1, /* write-through, not write-back */ Nolinefill= 1<<0, Basecfg = Wt | Nolinefill, }; static Lock l2lock; static int disallowed; /* by user: *l2off= in plan9.ini */ static int l2ison; static int bg_op_running; static ulong waysmask; static Cacheimpl l2cacheimpl; static void awaitbgop(void) { while (bg_op_running) ; } static void getlock(void) { awaitbgop(); /* wait at normal PL first */ ilock(&l2lock); awaitbgop(); /* wait under lock */ } static void l2pl310sync(void) { L2P->sync = 0; coherence(); } /* call this first to set sets/ways configuration */ void l2pl310init(void) { int waysz, nways; ulong new; L2pl310 *l2p = L2P; static int configed; if (getconf("*l2off") != nil) { // iprint("l2 cache (pl310) disabled\n"); disallowed = 1; return; } if (l2ison || configed) return; l2cache = &l2cacheimpl; cachedwb(); /* * default config is: * l2: ext unified, 8 ways 512 sets 32 bytes/line => 128KB * but the tegra 2 manual says there's 1MB available. * ways or way-size may be fixed by hardware; the only way to tell * is to try to change the setting and read it back. */ l2pl310sync(); l2cache->inv(); /* figure out number of ways */ l2pl310sync(); nways = NWAYS(l2p); if (!(l2p->auxctl & Assoc16way)) { l2p->auxctl |= Assoc16way; coherence(); l2pl310sync(); nways = NWAYS(l2p); // iprint("\nl2: was set for 8 ways, asked for 16, got %d\n", nways); } waysmask = MASK(nways); /* figure out way size (and thus number of sets) */ waysz = L2size / nways; new = l2p->auxctl & ~(Waycfgmask << Waycfgshift) | (log2(waysz / Wayszgran) + 1) << Waycfgshift; l2p->auxctl = new; coherence(); l2pl310sync(); l2cache->inv(); // iprint("\nl2: configed %d ways, %d sets (way size %d)\n", nways, // waysz / CACHELINESZ, waysz); if (l2p->auxctl != new) iprint("l2 config %#8.8lux didn't stick; is now %#8.8lux\n", new, l2p->auxctl); configed++; } void l2pl310info(Memcache *cp) { int pow2; ulong waysz; L2pl310 *l2p = L2P; memset(cp, 0, sizeof *cp); if (!l2ison) return; l2pl310init(); assert((l2p->id >> 24) == 'A'); cp->level = 2; cp->type = Unified; cp->external = Extcache; cp->setsways = Cara | Cawa | Cawt | Cawb; cp->l1ip = 3<<14; /* PIPT */ cp->setsh = cp->waysh = 0; /* bag on the side */ cp->linelen = CACHELINESZ; cp->log2linelen = log2(CACHELINESZ); cp->nways = NWAYS(l2p); pow2 = ((l2p->auxctl >> Waycfgshift) & Waycfgmask) - 1; if (pow2 < 0) pow2 = 0; waysz = (1 << pow2) * Wayszgran; cp->nsets = waysz / CACHELINESZ; } void l2pl310on(void) { ulong ctl; L2pl310 *l2p = L2P; if (getconf("*l2off") != nil) { // iprint("l2 cache (pl310) disabled\n"); disallowed = 1; return; } if (l2ison) return; l2pl310init(); l2cache->inv(); /* * drain l1. can't turn it off (which would make locks not work) * because doing so makes references below to the l2 registers wedge * the system. */ cacheuwbinv(); cacheiinv(); /* * this is only called once, on cpu0 at startup, * so we don't need locks here. * must do all configuration before enabling l2 cache. */ l2p->filtend = 0; coherence(); l2p->filtstart = 0; /* no enable bit */ l2p->debug = 0; /* write-back, line fills allowed */ coherence(); ctl = l2p->auxctl; /* don't change number of sets & ways, but reset all else. */ ctl &= Waycfgmask << Waycfgshift | Assoc16way; ctl |= Sharovr; /* actually work correctly for a change */ ctl |= Mbo | Ipref | Dpref | Parity | Fullline0; l2p->auxctl = ctl; coherence(); l2p->ctl |= L2enable; coherence(); l2ison = 1; // iprint("l2 cache (pl310) now on\n"); } void l2pl310off(void) { if (!l2ison) return; l2cache->wbinv(); getlock(); L2P->ctl &= ~L2enable; coherence(); l2ison = 0; iunlock(&l2lock); } static void applyrange(ulong *reg, void *ava, int len) { uintptr va, endva; if (disallowed || !l2ison) return; if (len < 0) panic("l2cache*se called with negative length"); endva = (uintptr)ava + len; for (va = (uintptr)ava & ~(CACHELINESZ-1); va < endva; va += CACHELINESZ) *reg = PADDR(va); l2pl310sync(); } void l2pl310invse(void *va, int bytes) { uintptr start, end; L2pl310 *l2p = L2P; /* * if start & end addresses are not on cache-line boundaries, * flush first & last cachelines before invalidating. */ start = (uintptr)va; end = start + bytes; getlock(); if (start % CACHELINESZ != 0) { // iprint("l2pl310invse: unaligned start %#p from %#p\n", start, // getcallerpc(&va)); applyrange(&l2p->clean.pa, va, 1); } if (end % CACHELINESZ != 0) { // iprint("l2pl310invse: unaligned end %#p from %#p\n", end, // getcallerpc(&va)); applyrange(&l2p->clean.pa, (char *)va + bytes, 1); } applyrange(&l2p->inv.pa, va, bytes); iunlock(&l2lock); } void l2pl310wbse(void *va, int bytes) { getlock(); applyrange(&L2P->clean.pa, va, bytes); iunlock(&l2lock); } /* * assume that ldrex/strex (thus locks) won't work when Wt in is effect, * so don't manipulate locks between setting and clearing Wt. */ void l2pl310wbinvse(void *va, int bytes) { int odb; L2pl310 *l2p = L2P; if (!l2ison) return; getlock(); applyrange(&l2p->clean.pa, va, bytes); /* paranoia */ odb = l2p->debug; l2p->debug |= Wt | Nolinefill; /* erratum workaround */ coherence(); applyrange(&l2p->cleaninv.pa, va, bytes); l2p->debug = odb; iunlock(&l2lock); } /* * we want to wait for completion at normal PL. * if waiting is interrupted, interrupt code that calls * these ops could deadlock on a uniprocessor, so we only * give up l2lock before waiting on multiprocessors. * in this port, only cpu 0 gets interrupts other than local timer ones. */ void l2pl310inv(void) { L2pl310 *l2p = L2P; if (disallowed) return; getlock(); bg_op_running = 1; l2p->inv.way = waysmask; coherence(); if (conf.nmach > 1) iunlock(&l2lock); while (l2p->inv.way & waysmask) ; if (conf.nmach > 1) ilock(&l2lock); l2pl310sync(); bg_op_running = 0; iunlock(&l2lock); } /* * maximum time seen is 2542µs, typical is 625µs. */ void l2pl310wb(void) { L2pl310 *l2p = L2P; if (disallowed || !l2ison) return; getlock(); bg_op_running = 1; l2p->clean.way = waysmask; coherence(); if (conf.nmach > 1) iunlock(&l2lock); while (l2p->clean.way & waysmask) ; if (conf.nmach > 1) ilock(&l2lock); l2pl310sync(); bg_op_running = 0; iunlock(&l2lock); } void l2pl310wbinv(void) { int odb; L2pl310 *l2p = L2P; if (disallowed || !l2ison) return; l2pl310wb(); /* paranoia */ getlock(); bg_op_running = 1; odb = l2p->debug; l2p->debug |= Wt | Nolinefill; /* erratum workaround */ coherence(); l2p->cleaninv.way = waysmask; coherence(); if (conf.nmach > 1) iunlock(&l2lock); while (l2p->cleaninv.way & waysmask) ; if (conf.nmach > 1) ilock(&l2lock); l2pl310sync(); l2p->debug = odb; bg_op_running = 0; iunlock(&l2lock); } static Cacheimpl l2cacheimpl = { .info = l2pl310info, .on = l2pl310on, .off = l2pl310off, .inv = l2pl310inv, .wb = l2pl310wb, .wbinv = l2pl310wbinv, .invse = l2pl310invse, .wbse = l2pl310wbse, .wbinvse= l2pl310wbinvse, };