ref: f3f93925173d15ca48e90ce1624452d7e3b7726f
parent: 93117262c2e377d9d4f1588924032d1b69e7e2f9
author: cinap_lenrek <[email protected]>
date: Sun Oct 29 19:09:54 EDT 2017
kernel: introduce devswap #¶ to serve /dev/swap and handle swapfile encryption
--- a/lib/namespace
+++ b/lib/namespace
@@ -10,6 +10,7 @@
bind -c #s /srv
bind -q #σ /shr
bind -a #¤ /dev
+bind -qa #¶ /dev
# authentication
mount -b /srv/factotum /mnt
--- a/sys/man/3/cons
+++ b/sys/man/3/cons
@@ -22,7 +22,6 @@
.B /dev/ppid
.B /dev/random
.B /dev/reboot
-.B /dev/swap
.B /dev/sysname
.B /dev/sysstat
.B /dev/time
@@ -239,41 +238,6 @@
Writing anything to
.B sysstat
resets all of the counts on all processors.
-.PP
-The
-.B swap
-device holds a text block giving memory usage statistics:
-.IP
-.EX
-\fIn\fP memory
-\fIn\fP pagesize
-\fIn\fP kernel
-\fIn\fP/\fIm\fP user
-\fIn\fP/\fIm\fP swap
-\fIa\fP/\fIn\fP/\fIm\fP kernel malloc
-\fIa\fP/\fIn\fP/\fIm\fP kernel draw
-.EE
-.PP
-These are total memory (bytes), system page size (bytes),
-kernel memory (pages), user memory (pages), swap space (pages),
-kernel malloced data (bytes), and kernel graphics data (bytes).
-The expression
-.IR n / m
-indicates
-.I n
-used out of
-.I m
-available.
-For kernel malloc and kernel draw,
-.IR a
-indicates the current allocation in bytes.
-These numbers are not blank padded.
-.PP
-To turn on swapping, write to
-.B swap
-the textual file descriptor number of a file or device on which to swap.
-See
-.IR swap (8).
.PP
Reads and writes to
.IR mordor
--- /dev/null
+++ b/sys/man/3/swap
@@ -1,0 +1,46 @@
+.TH SWAP 3
+.SH NAME
+swap \- memory usage statistics and pagefile control
+.SH SYNOPSIS
+.nf
+.B bind -a #¶ /dev
+
+.B /dev/swap
+.fi
+.SH DESCRIPTION
+The
+.B swap
+device holds a text block giving memory usage statistics:
+.IP
+.EX
+\fIn\fP memory
+\fIn\fP pagesize
+\fIn\fP kernel
+\fIn\fP/\fIm\fP user
+\fIn\fP/\fIm\fP swap
+\fIa\fP/\fIn\fP/\fIm\fP kernel malloc
+\fIa\fP/\fIn\fP/\fIm\fP kernel draw
+.EE
+.PP
+These are total memory (bytes), system page size (bytes),
+kernel memory (pages), user memory (pages), swap space (pages),
+kernel malloced data (bytes), and kernel graphics data (bytes).
+The expression
+.IR n / m
+indicates
+.I n
+used out of
+.I m
+available.
+For kernel malloc and kernel draw,
+.IR a
+indicates the current allocation in bytes.
+These numbers are not blank padded.
+.PP
+To turn on swapping, write to
+.B swap
+the textual file descriptor number of a file or device on which to swap.
+.SH SEE ALSO
+.IR swap (8).
+.SH SOURCE
+.B /sys/src/9/port/devswap.c
--- a/sys/man/8/swap
+++ b/sys/man/8/swap
@@ -35,4 +35,5 @@
ctl-message in
.IR proc (3)).
.SH "SEE ALSO"
+.IR swap (3),
.IR proc (3)
--- a/sys/src/9/bcm/main.c
+++ b/sys/src/9/bcm/main.c
@@ -270,7 +270,6 @@
links();
chandevreset(); /* most devices are discovered here */
pageinit();
- swapinit();
userinit();
gpiomeminit();
schedinit();
--- a/sys/src/9/bcm/mkfile
+++ b/sys/src/9/bcm/mkfile
@@ -33,7 +33,6 @@
rdb.$O\
rebootcmd.$O\
segment.$O\
- swap.$O\
syscallfmt.$O\
sysfile.$O\
sysproc.$O\
--- a/sys/src/9/bcm/picpuf
+++ b/sys/src/9/bcm/picpuf
@@ -1,6 +1,7 @@
dev
root
cons
+ swap
env
pipe
proc
--- a/sys/src/9/bcm/pif
+++ b/sys/src/9/bcm/pif
@@ -1,6 +1,7 @@
dev
root
cons
+ swap
env
pipe
proc
--- a/sys/src/9/boot/bootrc
+++ b/sys/src/9/boot/bootrc
@@ -10,7 +10,7 @@
bind -q '#d' /fd
bind -q '#p' /proc
-for(i in S f k æ t b m)
+for(i in ¶ P S f k æ t b m)
bind -qa '#'^$i /dev
# bind in an ip interface
--- a/sys/src/9/kw/main.c
+++ b/sys/src/9/kw/main.c
@@ -322,7 +322,6 @@
chandevreset(); /* most devices are discovered here */
pageinit();
- swapinit();
userinit();
schedinit();
panic("schedinit returned");
--- a/sys/src/9/kw/mkfile
+++ b/sys/src/9/kw/mkfile
@@ -32,7 +32,6 @@
qio.$O\
qlock.$O\
segment.$O\
- swap.$O\
syscallfmt.$O\
sysfile.$O\
sysproc.$O\
--- a/sys/src/9/kw/plug
+++ b/sys/src/9/kw/plug
@@ -3,6 +3,7 @@
dev
root
cons
+ swap
env
pipe
proc
--- a/sys/src/9/mtx/main.c
+++ b/sys/src/9/mtx/main.c
@@ -35,7 +35,6 @@
links();
chandevreset();
pageinit();
- swapinit();
fpsave(&initfp);
initfp.fpscr = 0;
userinit();
--- a/sys/src/9/mtx/mkfile
+++ b/sys/src/9/mtx/mkfile
@@ -30,7 +30,6 @@
qlock.$O\
rdb.$O\
segment.$O\
- swap.$O\
sysfile.$O\
sysproc.$O\
taslock.$O\
--- a/sys/src/9/mtx/mtx
+++ b/sys/src/9/mtx/mtx
@@ -1,6 +1,7 @@
dev
root
cons
+ swap
arch
pnp pci
env
--- a/sys/src/9/mtx/mtxcpu
+++ b/sys/src/9/mtx/mtxcpu
@@ -1,6 +1,7 @@
dev
root
cons
+ swap
arch
pnp pci
env
--- a/sys/src/9/omap/beagle
+++ b/sys/src/9/omap/beagle
@@ -2,6 +2,7 @@
dev
root
cons
+ swap
env
pipe
proc
--- a/sys/src/9/omap/main.c
+++ b/sys/src/9/omap/main.c
@@ -276,7 +276,6 @@
// i8250console(); /* too early; see init0 */
pageinit();
- swapinit();
userinit();
schedinit();
}
--- a/sys/src/9/omap/mkfile
+++ b/sys/src/9/omap/mkfile
@@ -33,7 +33,6 @@
qio.$O\
qlock.$O\
segment.$O\
- swap.$O\
sysfile.$O\
sysproc.$O\
taslock.$O\
--- a/sys/src/9/pc/main.c
+++ b/sys/src/9/pc/main.c
@@ -62,7 +62,6 @@
chandevreset();
netconsole();
pageinit();
- swapinit();
userinit();
schedinit();
}
--- a/sys/src/9/pc/mkfile
+++ b/sys/src/9/pc/mkfile
@@ -35,7 +35,6 @@
rdb.$O\
rebootcmd.$O\
segment.$O\
- swap.$O\
syscallfmt.$O\
sysfile.$O\
sysproc.$O\
--- a/sys/src/9/pc/pc
+++ b/sys/src/9/pc/pc
@@ -2,6 +2,7 @@
dev
root
cons
+ swap
arch
pnp pci
env
--- a/sys/src/9/pc64/main.c
+++ b/sys/src/9/pc64/main.c
@@ -332,7 +332,6 @@
netconsole();
preallocpages();
pageinit();
- swapinit();
userinit();
schedinit();
}
--- a/sys/src/9/pc64/mkfile
+++ b/sys/src/9/pc64/mkfile
@@ -33,7 +33,6 @@
rdb.$O\
rebootcmd.$O\
segment.$O\
- swap.$O\
syscallfmt.$O\
sysfile.$O\
sysproc.$O\
--- a/sys/src/9/pc64/pc64
+++ b/sys/src/9/pc64/pc64
@@ -2,6 +2,7 @@
dev
root
cons
+ swap
arch
pnp pci
env
--- a/sys/src/9/port/devcons.c
+++ b/sys/src/9/port/devcons.c
@@ -5,7 +5,6 @@
#include "fns.h"
#include "../port/error.h"
-#include <pool.h>
#include <authsrv.h>
void (*consdebug)(void) = nil;
@@ -324,7 +323,6 @@
Qppid,
Qrandom,
Qreboot,
- Qswap,
Qsysname,
Qsysstat,
Qtime,
@@ -357,7 +355,6 @@
"ppid", {Qppid}, NUMSIZE, 0444,
"random", {Qrandom}, 0, 0444,
"reboot", {Qreboot}, 0, 0664,
- "swap", {Qswap}, 0, 0664,
"sysname", {Qsysname}, 0, 0664,
"sysstat", {Qsysstat}, 0, 0666,
"time", {Qtime}, NUMSIZE+3*VLNUMSIZE, 0664,
@@ -471,8 +468,6 @@
int i, k, id;
vlong offset = off;
extern char configfile[];
- extern Image fscache;
- extern Image swapimage;
if(n <= 0)
return n;
@@ -592,33 +587,6 @@
poperror();
return n;
- case Qswap:
- snprint(tmp, sizeof tmp,
- "%llud memory\n"
- "%llud pagesize\n"
- "%lud kernel\n"
- "%lud/%lud user\n"
- "%lud/%lud swap\n"
- "%llud/%llud/%llud kernel malloc\n"
- "%llud/%llud/%llud kernel draw\n"
- "%llud/%llud/%llud kernel secret\n",
- (uvlong)conf.npage*BY2PG,
- (uvlong)BY2PG,
- conf.npage-conf.upages,
- palloc.user-palloc.freecount-fscache.pgref-swapimage.pgref, palloc.user,
- conf.nswap-swapalloc.free, conf.nswap,
- (uvlong)mainmem->curalloc,
- (uvlong)mainmem->cursize,
- (uvlong)mainmem->maxsize,
- (uvlong)imagmem->curalloc,
- (uvlong)imagmem->cursize,
- (uvlong)imagmem->maxsize,
- (uvlong)secrmem->curalloc,
- (uvlong)secrmem->cursize,
- (uvlong)secrmem->maxsize);
-
- return readstr((ulong)offset, buf, n, tmp);
-
case Qsysname:
if(sysname == nil)
return 0;
@@ -669,8 +637,7 @@
long l, bp;
char *a;
Mach *mp;
- int id, fd;
- Chan *swc;
+ int id;
ulong offset;
Cmdbuf *cb;
Cmdtab *ct;
@@ -763,25 +730,6 @@
mp->tlbpurge = 0;
}
}
- break;
-
- case Qswap:
- if(n >= sizeof buf)
- error(Egreg);
- memmove(buf, va, n); /* so we can NUL-terminate */
- buf[n] = 0;
- /* start a pager if not already started */
- if(strncmp(buf, "start", 5) == 0){
- kickpager();
- break;
- }
- if(!iseve())
- error(Eperm);
- if(buf[0]<'0' || '9'<buf[0])
- error(Ebadarg);
- fd = strtoul(buf, 0, 0);
- swc = fdtochan(fd, ORDWR, 1, 1);
- setswapchan(swc);
break;
case Qsysname:
--- /dev/null
+++ b/sys/src/9/port/devswap.c
@@ -1,0 +1,612 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include <libsec.h>
+#include <pool.h>
+
+static int canflush(Proc*, Segment*);
+static void executeio(void);
+static void pageout(Proc*, Segment*);
+static void pagepte(int, Page**);
+static void pager(void*);
+
+Image swapimage = {
+ .notext = 1,
+};
+
+static Chan *swapchan;
+static uchar *swapbuf;
+static AESstate *swapkey;
+
+static Page **iolist;
+static int ioptr;
+
+static ushort ageclock;
+
+static void
+swapinit(void)
+{
+ swapalloc.swmap = xalloc(conf.nswap);
+ swapalloc.top = &swapalloc.swmap[conf.nswap];
+ swapalloc.alloc = swapalloc.swmap;
+ swapalloc.last = swapalloc.swmap;
+ swapalloc.free = conf.nswap;
+ swapalloc.xref = 0;
+
+ iolist = xalloc(conf.nswppo*sizeof(Page*));
+ if(swapalloc.swmap == nil || iolist == nil)
+ panic("swapinit: not enough memory");
+}
+
+static uintptr
+newswap(void)
+{
+ uchar *look;
+
+ lock(&swapalloc);
+ if(swapalloc.free == 0) {
+ unlock(&swapalloc);
+ return ~0;
+ }
+ look = memchr(swapalloc.last, 0, swapalloc.top-swapalloc.last);
+ if(look == nil)
+ look = memchr(swapalloc.swmap, 0, swapalloc.last-swapalloc.swmap);
+ *look = 2; /* ref for pte + io transaction */
+ swapalloc.last = look;
+ swapalloc.free--;
+ unlock(&swapalloc);
+ return (look-swapalloc.swmap) * BY2PG;
+}
+
+void
+putswap(Page *p)
+{
+ uchar *idx;
+
+ lock(&swapalloc);
+ idx = &swapalloc.swmap[((uintptr)p)/BY2PG];
+ if(*idx == 0)
+ panic("putswap %#p ref == 0", p);
+
+ if(*idx == 255) {
+ if(swapalloc.xref == 0)
+ panic("putswap %#p xref == 0", p);
+
+ if(--swapalloc.xref == 0) {
+ for(idx = swapalloc.swmap; idx < swapalloc.top; idx++) {
+ if(*idx == 255) {
+ *idx = 0;
+ swapalloc.free++;
+ }
+ }
+ }
+ } else {
+ if(--(*idx) == 0)
+ swapalloc.free++;
+ }
+ unlock(&swapalloc);
+}
+
+void
+dupswap(Page *p)
+{
+ uchar *idx;
+
+ lock(&swapalloc);
+ idx = &swapalloc.swmap[((uintptr)p)/BY2PG];
+ if(*idx == 255)
+ swapalloc.xref++;
+ else {
+ if(++(*idx) == 255)
+ swapalloc.xref += 255;
+ }
+ unlock(&swapalloc);
+}
+
+int
+swapcount(uintptr daddr)
+{
+ return swapalloc.swmap[daddr/BY2PG];
+}
+
+void
+kickpager(void)
+{
+ static Ref started;
+
+ if(started.ref || incref(&started) != 1)
+ wakeup(&swapalloc.r);
+ else
+ kproc("pager", pager, 0);
+}
+
+static int
+reclaim(void)
+{
+ ulong np;
+
+ for(;;){
+ if((np = pagereclaim(&fscache, 1000)) > 0) {
+ if(0) print("reclaim: %lud fscache\n", np);
+ } else if((np = pagereclaim(&swapimage, 1000)) > 0) {
+ if(0) print("reclaim: %lud swap\n", np);
+ } else if((np = imagereclaim(1000)) > 0) {
+ if(0) print("reclaim: %lud image\n", np);
+ }
+ if(!needpages(nil))
+ return 1; /* have pages, done */
+ if(np == 0)
+ return 0; /* didnt reclaim, need to swap */
+ sched();
+ }
+}
+
+static void
+pager(void*)
+{
+ int i;
+ Segment *s;
+ Proc *p, *ep;
+
+ p = proctab(0);
+ ep = &p[conf.nproc];
+
+ while(waserror())
+ ;
+
+ for(;;){
+ up->psstate = "Reclaim";
+ if(reclaim()){
+ up->psstate = "Idle";
+ wakeup(&palloc.pwait[0]);
+ wakeup(&palloc.pwait[1]);
+ sleep(&swapalloc.r, needpages, nil);
+ continue;
+ }
+
+ if(swapimage.c == nil || swapalloc.free == 0){
+ Killbig:
+ if(!freebroken())
+ killbig("out of memory");
+ sched();
+ continue;
+ }
+
+ i = ageclock;
+ do {
+ if(++p >= ep){
+ if(++ageclock == i)
+ goto Killbig;
+ p = proctab(0);
+ }
+ } while(p->state == Dead || p->noswap || !canqlock(&p->seglock));
+ up->psstate = "Pageout";
+ for(i = 0; i < NSEG; i++) {
+ if((s = p->seg[i]) != nil) {
+ switch(s->type&SG_TYPE) {
+ default:
+ break;
+ case SG_TEXT:
+ pageout(p, s);
+ break;
+ case SG_DATA:
+ case SG_BSS:
+ case SG_STACK:
+ case SG_SHARED:
+ pageout(p, s);
+ break;
+ }
+ }
+ }
+ qunlock(&p->seglock);
+
+ if(ioptr > 0) {
+ up->psstate = "I/O";
+ executeio();
+ }
+ }
+}
+
+static void
+pageout(Proc *p, Segment *s)
+{
+ int type, i, size;
+ short age;
+ Pte *l;
+ Page **pg, *entry;
+
+ if(!canqlock(s)) /* We cannot afford to wait, we will surely deadlock */
+ return;
+
+ if(!canflush(p, s) /* Able to invalidate all tlbs with references */
+ || waserror()) {
+ qunlock(s);
+ putseg(s);
+ return;
+ }
+
+ /* Pass through the pte tables looking for memory pages to swap out */
+ type = s->type&SG_TYPE;
+ size = s->mapsize;
+ for(i = 0; i < size; i++) {
+ l = s->map[i];
+ if(l == nil)
+ continue;
+ for(pg = l->first; pg <= l->last; pg++) {
+ entry = *pg;
+ if(pagedout(entry))
+ continue;
+ if(entry->modref & PG_REF) {
+ entry->modref &= ~PG_REF;
+ entry->refage = ageclock;
+ continue;
+ }
+ age = (short)(ageclock - entry->refage);
+ if(age < 16)
+ continue;
+ pagepte(type, pg);
+ }
+ }
+ poperror();
+ qunlock(s);
+ putseg(s);
+}
+
+static int
+canflush(Proc *p, Segment *s)
+{
+ int i;
+ Proc *ep;
+
+ if(incref(s) == 2) /* Easy if we are the only user */
+ return canpage(p);
+
+ /* Now we must do hardwork to ensure all processes which have tlb
+ * entries for this segment will be flushed if we succeed in paging it out
+ */
+ p = proctab(0);
+ ep = &p[conf.nproc];
+ while(p < ep) {
+ if(p->state != Dead) {
+ for(i = 0; i < NSEG; i++)
+ if(p->seg[i] == s)
+ if(!canpage(p))
+ return 0;
+ }
+ p++;
+ }
+ return 1;
+}
+
+static void
+pagepte(int type, Page **pg)
+{
+ uintptr daddr;
+ Page *outp;
+
+ outp = *pg;
+ switch(type) {
+ case SG_TEXT: /* Revert to demand load */
+ putpage(outp);
+ *pg = nil;
+ break;
+
+ case SG_DATA:
+ case SG_BSS:
+ case SG_STACK:
+ case SG_SHARED:
+ if(ioptr >= conf.nswppo)
+ break;
+
+ /*
+ * get a new swap address with swapcount 2, one for the pte
+ * and one extra ref for us while we write the page to disk
+ */
+ daddr = newswap();
+ if(daddr == ~0)
+ break;
+
+ /* clear any pages referring to it from the cache */
+ cachedel(&swapimage, daddr);
+
+ /* forget anything that it used to cache */
+ uncachepage(outp);
+
+ /*
+ * enter it into the cache so that a fault happening
+ * during the write will grab the page from the cache
+ * rather than one partially written to the disk
+ */
+ outp->daddr = daddr;
+ cachepage(outp, &swapimage);
+ *pg = (Page*)(daddr|PG_ONSWAP);
+
+ /* Add page to IO transaction list */
+ iolist[ioptr++] = outp;
+ break;
+ }
+}
+
+void
+pagersummary(void)
+{
+ print("%lud/%lud memory %lud/%lud swap %d iolist\n",
+ palloc.user-palloc.freecount,
+ palloc.user, conf.nswap-swapalloc.free, conf.nswap,
+ ioptr);
+}
+
+static void
+executeio(void)
+{
+ Page *outp;
+ int i, n;
+ Chan *c;
+ char *kaddr;
+ KMap *k;
+
+ c = swapimage.c;
+ for(i = 0; i < ioptr; i++) {
+ if(ioptr > conf.nswppo)
+ panic("executeio: ioptr %d > %d", ioptr, conf.nswppo);
+ outp = iolist[i];
+
+ assert(outp->ref > 0);
+ assert(outp->image == &swapimage);
+ assert(outp->daddr != ~0);
+
+ /* only write when swap address still in use */
+ if(swapcount(outp->daddr) > 1){
+ k = kmap(outp);
+ kaddr = (char*)VA(k);
+
+ if(waserror())
+ panic("executeio: page outp I/O error");
+
+ n = devtab[c->type]->write(c, kaddr, BY2PG, outp->daddr);
+ if(n != BY2PG)
+ nexterror();
+
+ kunmap(k);
+ poperror();
+ }
+
+ /* drop our extra swap reference */
+ putswap((Page*)outp->daddr);
+
+ /* Free up the page after I/O */
+ putpage(outp);
+ }
+ ioptr = 0;
+}
+
+int
+needpages(void*)
+{
+ return palloc.freecount < swapalloc.headroom;
+}
+
+static void
+setswapchan(Chan *c)
+{
+ uchar buf[sizeof(Dir)+100];
+ Dir d;
+ int n;
+
+ if(waserror()){
+ cclose(c);
+ nexterror();
+ }
+ if(swapimage.c != nil) {
+ if(swapalloc.free != conf.nswap)
+ error(Einuse);
+ cclose(swapimage.c);
+ swapimage.c = nil;
+ }
+
+ /*
+ * if this isn't a file, set the swap space
+ * to be at most the size of the partition
+ */
+ if(devtab[c->type]->dc != L'M'){
+ n = devtab[c->type]->stat(c, buf, sizeof buf);
+ if(n <= 0 || convM2D(buf, n, &d, nil) == 0)
+ error("stat failed in setswapchan");
+ if(d.length < conf.nswppo*BY2PG)
+ error("swap device too small");
+ if(d.length < conf.nswap*BY2PG){
+ conf.nswap = d.length/BY2PG;
+ swapalloc.top = &swapalloc.swmap[conf.nswap];
+ swapalloc.free = conf.nswap;
+ }
+ }
+ c->flag &= ~CCACHE;
+ cclunk(c);
+ poperror();
+
+ swapchan = c;
+ swapimage.c = namec("#¶/swapfile", Aopen, ORDWR, 0);
+}
+
+enum {
+ Qdir,
+ Qswap,
+ Qswapfile,
+};
+
+static Dirtab swapdir[]={
+ ".", {Qdir, 0, QTDIR}, 0, DMDIR|0555,
+ "swap", {Qswap}, 0, 0664,
+ "swapfile", {Qswapfile}, 0, 0600,
+};
+
+static Chan*
+swapattach(char *spec)
+{
+ return devattach(L'¶', spec);
+}
+
+static Walkqid*
+swapwalk(Chan *c, Chan *nc, char **name, int nname)
+{
+ return devwalk(c, nc, name, nname, swapdir, nelem(swapdir), devgen);
+}
+
+static int
+swapstat(Chan *c, uchar *dp, int n)
+{
+ return devstat(c, dp, n, swapdir, nelem(swapdir), devgen);
+}
+
+static Chan*
+swapopen(Chan *c, int omode)
+{
+ uchar key[128/8];
+
+ switch((ulong)c->qid.path){
+ case Qswapfile:
+ if(!iseve() || omode != ORDWR)
+ error(Eperm);
+ if(swapimage.c != nil)
+ error(Einuse);
+ if(swapchan == nil)
+ error(Egreg);
+
+ c->mode = openmode(omode);
+ c->flag |= COPEN;
+ c->offset = 0;
+
+ swapbuf = mallocalign(BY2PG, BY2PG, 0, 0);
+ swapkey = secalloc(sizeof(AESstate)*2);
+ if(swapbuf == nil || swapkey == nil)
+ error(Enomem);
+
+ genrandom(key, sizeof(key));
+ setupAESstate(&swapkey[0], key, sizeof(key), nil);
+ genrandom(key, sizeof(key));
+ setupAESstate(&swapkey[1], key, sizeof(key), nil);
+ memset(key, 0, sizeof(key));
+
+ return c;
+ }
+ return devopen(c, omode, swapdir, nelem(swapdir), devgen);
+}
+
+static void
+swapclose(Chan *c)
+{
+ if((c->flag & COPEN) == 0)
+ return;
+ switch((ulong)c->qid.path){
+ case Qswapfile:
+ cclose(swapchan);
+ swapchan = nil;
+ secfree(swapkey);
+ swapkey = nil;
+ free(swapbuf);
+ swapbuf = nil;
+ break;
+ }
+}
+
+static long
+swapread(Chan *c, void *va, long n, vlong off)
+{
+ char tmp[256]; /* must be >= 18*NUMSIZE (Qswap) */
+
+ switch((ulong)c->qid.path){
+ case Qdir:
+ return devdirread(c, va, n, swapdir, nelem(swapdir), devgen);
+ case Qswap:
+ snprint(tmp, sizeof tmp,
+ "%llud memory\n"
+ "%llud pagesize\n"
+ "%lud kernel\n"
+ "%lud/%lud user\n"
+ "%lud/%lud swap\n"
+ "%llud/%llud/%llud kernel malloc\n"
+ "%llud/%llud/%llud kernel draw\n"
+ "%llud/%llud/%llud kernel secret\n",
+ (uvlong)conf.npage*BY2PG,
+ (uvlong)BY2PG,
+ conf.npage-conf.upages,
+ palloc.user-palloc.freecount-fscache.pgref-swapimage.pgref, palloc.user,
+ conf.nswap-swapalloc.free, conf.nswap,
+ (uvlong)mainmem->curalloc,
+ (uvlong)mainmem->cursize,
+ (uvlong)mainmem->maxsize,
+ (uvlong)imagmem->curalloc,
+ (uvlong)imagmem->cursize,
+ (uvlong)imagmem->maxsize,
+ (uvlong)secrmem->curalloc,
+ (uvlong)secrmem->cursize,
+ (uvlong)secrmem->maxsize);
+ return readstr((ulong)off, va, n, tmp);
+ case Qswapfile:
+ if(n != BY2PG)
+ error(Ebadarg);
+ if(devtab[swapchan->type]->read(swapchan, va, n, off) != n)
+ error(Eio);
+ aes_xts_decrypt(&swapkey[0], &swapkey[1], off, va, va, n);
+ return n;
+ }
+ error(Egreg);
+ return 0;
+}
+
+static long
+swapwrite(Chan *c, void *va, long n, vlong off)
+{
+ char buf[256];
+
+ switch((ulong)c->qid.path){
+ case Qswap:
+ if(!iseve())
+ error(Eperm);
+ if(n >= sizeof buf)
+ error(Egreg);
+ memmove(buf, va, n); /* so we can NUL-terminate */
+ buf[n] = 0;
+ /* start a pager if not already started */
+ if(strncmp(buf, "start", 5) == 0)
+ kickpager();
+ else if(buf[0]>='0' && '9'<=buf[0])
+ setswapchan(fdtochan(strtoul(buf, nil, 0), ORDWR, 1, 1));
+ else
+ error(Ebadctl);
+ return n;
+ case Qswapfile:
+ if(n != BY2PG)
+ error(Ebadarg);
+ aes_xts_encrypt(&swapkey[0], &swapkey[1], off, va, swapbuf, n);
+ if(devtab[swapchan->type]->write(swapchan, swapbuf, n, off) != n)
+ error(Eio);
+ return n;
+ }
+ error(Egreg);
+ return 0;
+}
+
+Dev swapdevtab = {
+ L'¶',
+ "swap",
+ devreset,
+ swapinit,
+ devshutdown,
+ swapattach,
+ swapwalk,
+ swapstat,
+ swapopen,
+ devcreate,
+ swapclose,
+ swapread,
+ devbread,
+ swapwrite,
+ devbwrite,
+ devremove,
+ devwstat,
+};
--- a/sys/src/9/port/portfns.h
+++ b/sys/src/9/port/portfns.h
@@ -318,7 +318,6 @@
void setmalloctag(void*, uintptr);
void setrealloctag(void*, uintptr);
void setregisters(Ureg*, char*, char*, int);
-void setswapchan(Chan*);
void setupwatchpts(Proc*, Watchpt*, int);
char* skipslash(char*);
void sleep(Rendez*, int(*)(void*), void*);
@@ -332,7 +331,6 @@
void shrrenameuser(char*, char*);
int swapcount(uintptr);
int swapfull(void);
-void swapinit(void);
void syscallfmt(ulong syscallno, uintptr pc, va_list list);
void sysretfmt(ulong syscallno, va_list list, uintptr ret, uvlong start, uvlong stop);
void timeradd(Timer*);
--- a/sys/src/9/port/portmkfile
+++ b/sys/src/9/port/portmkfile
@@ -62,15 +62,15 @@
%.db: main.$O
$CC -s$stem main.c | dbfmt > $stem.db
-alloc.$O: /sys/include/pool.h
+alloc.$O devswap.$O: /sys/include/pool.h
devmnt.$O: /sys/include/fcall.h
proc.$O proc.acid: errstr.h
devroot.$O: errstr.h
devaudio.$O: ../port/audioif.h
-devaoe.$O: /$objtype/include/ureg.h
-devfs.$O: /$objtype/include/ureg.h
-devsd.$O: /$objtype/include/ureg.h
-sdscsi.$O: /$objtype/include/ureg.h
+devaoe.$O: ../port/sd.h /$objtype/include/ureg.h
+devfs.$O: ../port/sd.h /$objtype/include/ureg.h
+devsd.$O: ../port/sd.h /$objtype/include/ureg.h
+sdscsi.$O: ../port/sd.h /$objtype/include/ureg.h
trap.$O: /$objtype/include/ureg.h
devproc.$O: /$objtype/include/ureg.h
main.$O: init.h
@@ -87,3 +87,5 @@
devsdp.$O: ../port/thwack.h
devproc.$O sysproc.$O: /sys/include/tos.h
devproc.$O edf.$O proc.$O: /sys/include/trace.h
+devcons.$O: /sys/include/authsrv.h
+devcap.$O devfs.$O devsdp.$O devssl.$O devtls.$O devswap.$O random.$O: /sys/include/libsec.h
--- a/sys/src/9/port/swap.c
+++ /dev/null
@@ -1,430 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-
-static int canflush(Proc*, Segment*);
-static void executeio(void);
-static void pageout(Proc*, Segment*);
-static void pagepte(int, Page**);
-static void pager(void*);
-
-Image swapimage;
-
-static int swopen;
-static Page **iolist;
-static int ioptr;
-
-static ushort ageclock;
-
-void
-swapinit(void)
-{
- swapalloc.swmap = xalloc(conf.nswap);
- swapalloc.top = &swapalloc.swmap[conf.nswap];
- swapalloc.alloc = swapalloc.swmap;
- swapalloc.last = swapalloc.swmap;
- swapalloc.free = conf.nswap;
- swapalloc.xref = 0;
-
- iolist = xalloc(conf.nswppo*sizeof(Page*));
- if(swapalloc.swmap == 0 || iolist == 0)
- panic("swapinit: not enough memory");
-
- swapimage.notext = 1;
-}
-
-static uintptr
-newswap(void)
-{
- uchar *look;
-
- lock(&swapalloc);
- if(swapalloc.free == 0) {
- unlock(&swapalloc);
- return ~0;
- }
- look = memchr(swapalloc.last, 0, swapalloc.top-swapalloc.last);
- if(look == nil)
- look = memchr(swapalloc.swmap, 0, swapalloc.last-swapalloc.swmap);
- *look = 2; /* ref for pte + io transaction */
- swapalloc.last = look;
- swapalloc.free--;
- unlock(&swapalloc);
- return (look-swapalloc.swmap) * BY2PG;
-}
-
-void
-putswap(Page *p)
-{
- uchar *idx;
-
- lock(&swapalloc);
- idx = &swapalloc.swmap[((uintptr)p)/BY2PG];
- if(*idx == 0)
- panic("putswap %#p ref == 0", p);
-
- if(*idx == 255) {
- if(swapalloc.xref == 0)
- panic("putswap %#p xref == 0", p);
-
- if(--swapalloc.xref == 0) {
- for(idx = swapalloc.swmap; idx < swapalloc.top; idx++) {
- if(*idx == 255) {
- *idx = 0;
- swapalloc.free++;
- }
- }
- }
- } else {
- if(--(*idx) == 0)
- swapalloc.free++;
- }
- unlock(&swapalloc);
-}
-
-void
-dupswap(Page *p)
-{
- uchar *idx;
-
- lock(&swapalloc);
- idx = &swapalloc.swmap[((uintptr)p)/BY2PG];
- if(*idx == 255)
- swapalloc.xref++;
- else {
- if(++(*idx) == 255)
- swapalloc.xref += 255;
- }
- unlock(&swapalloc);
-}
-
-int
-swapcount(uintptr daddr)
-{
- return swapalloc.swmap[daddr/BY2PG];
-}
-
-void
-kickpager(void)
-{
- static Ref started;
-
- if(started.ref || incref(&started) != 1)
- wakeup(&swapalloc.r);
- else
- kproc("pager", pager, 0);
-}
-
-static int
-reclaim(void)
-{
- ulong np;
-
- for(;;){
- if((np = pagereclaim(&fscache, 1000)) > 0) {
- if(0) print("reclaim: %lud fscache\n", np);
- } else if((np = pagereclaim(&swapimage, 1000)) > 0) {
- if(0) print("reclaim: %lud swap\n", np);
- } else if((np = imagereclaim(1000)) > 0) {
- if(0) print("reclaim: %lud image\n", np);
- }
- if(!needpages(nil))
- return 1; /* have pages, done */
- if(np == 0)
- return 0; /* didnt reclaim, need to swap */
- sched();
- }
-}
-
-static void
-pager(void*)
-{
- int i;
- Segment *s;
- Proc *p, *ep;
-
- p = proctab(0);
- ep = &p[conf.nproc];
-
- while(waserror())
- ;
-
- for(;;){
- up->psstate = "Reclaim";
- if(reclaim()){
- up->psstate = "Idle";
- wakeup(&palloc.pwait[0]);
- wakeup(&palloc.pwait[1]);
- sleep(&swapalloc.r, needpages, nil);
- continue;
- }
-
- if(swapimage.c == nil || swapalloc.free == 0){
- Killbig:
- if(!freebroken())
- killbig("out of memory");
- sched();
- continue;
- }
-
- i = ageclock;
- do {
- if(++p >= ep){
- if(++ageclock == i)
- goto Killbig;
- p = proctab(0);
- }
- } while(p->state == Dead || p->noswap || !canqlock(&p->seglock));
- up->psstate = "Pageout";
- for(i = 0; i < NSEG; i++) {
- if((s = p->seg[i]) != nil) {
- switch(s->type&SG_TYPE) {
- default:
- break;
- case SG_TEXT:
- pageout(p, s);
- break;
- case SG_DATA:
- case SG_BSS:
- case SG_STACK:
- case SG_SHARED:
- pageout(p, s);
- break;
- }
- }
- }
- qunlock(&p->seglock);
-
- if(ioptr > 0) {
- up->psstate = "I/O";
- executeio();
- }
- }
-}
-
-static void
-pageout(Proc *p, Segment *s)
-{
- int type, i, size;
- short age;
- Pte *l;
- Page **pg, *entry;
-
- if(!canqlock(s)) /* We cannot afford to wait, we will surely deadlock */
- return;
-
- if(!canflush(p, s)) { /* Able to invalidate all tlbs with references */
- qunlock(s);
- putseg(s);
- return;
- }
-
- if(waserror()) {
- qunlock(s);
- putseg(s);
- return;
- }
-
- /* Pass through the pte tables looking for memory pages to swap out */
- type = s->type&SG_TYPE;
- size = s->mapsize;
- for(i = 0; i < size; i++) {
- l = s->map[i];
- if(l == nil)
- continue;
- for(pg = l->first; pg <= l->last; pg++) {
- entry = *pg;
- if(pagedout(entry))
- continue;
- if(entry->modref & PG_REF) {
- entry->modref &= ~PG_REF;
- entry->refage = ageclock;
- continue;
- }
- age = (short)(ageclock - entry->refage);
- if(age < 16)
- continue;
- pagepte(type, pg);
- }
- }
- poperror();
- qunlock(s);
- putseg(s);
-}
-
-static int
-canflush(Proc *p, Segment *s)
-{
- int i;
- Proc *ep;
-
- if(incref(s) == 2) /* Easy if we are the only user */
- return canpage(p);
-
- /* Now we must do hardwork to ensure all processes which have tlb
- * entries for this segment will be flushed if we succeed in paging it out
- */
- p = proctab(0);
- ep = &p[conf.nproc];
- while(p < ep) {
- if(p->state != Dead) {
- for(i = 0; i < NSEG; i++)
- if(p->seg[i] == s)
- if(!canpage(p))
- return 0;
- }
- p++;
- }
- return 1;
-}
-
-static void
-pagepte(int type, Page **pg)
-{
- uintptr daddr;
- Page *outp;
-
- outp = *pg;
- switch(type) {
- case SG_TEXT: /* Revert to demand load */
- putpage(outp);
- *pg = nil;
- break;
-
- case SG_DATA:
- case SG_BSS:
- case SG_STACK:
- case SG_SHARED:
- if(ioptr >= conf.nswppo)
- break;
-
- /*
- * get a new swap address with swapcount 2, one for the pte
- * and one extra ref for us while we write the page to disk
- */
- daddr = newswap();
- if(daddr == ~0)
- break;
-
- /* clear any pages referring to it from the cache */
- cachedel(&swapimage, daddr);
-
- /* forget anything that it used to cache */
- uncachepage(outp);
-
- /*
- * enter it into the cache so that a fault happening
- * during the write will grab the page from the cache
- * rather than one partially written to the disk
- */
- outp->daddr = daddr;
- cachepage(outp, &swapimage);
- *pg = (Page*)(daddr|PG_ONSWAP);
-
- /* Add page to IO transaction list */
- iolist[ioptr++] = outp;
- break;
- }
-}
-
-void
-pagersummary(void)
-{
- print("%lud/%lud memory %lud/%lud swap %d iolist\n",
- palloc.user-palloc.freecount,
- palloc.user, conf.nswap-swapalloc.free, conf.nswap,
- ioptr);
-}
-
-static void
-executeio(void)
-{
- Page *outp;
- int i, n;
- Chan *c;
- char *kaddr;
- KMap *k;
-
- c = swapimage.c;
- for(i = 0; i < ioptr; i++) {
- if(ioptr > conf.nswppo)
- panic("executeio: ioptr %d > %d", ioptr, conf.nswppo);
- outp = iolist[i];
-
- assert(outp->ref > 0);
- assert(outp->image == &swapimage);
- assert(outp->daddr != ~0);
-
- /* only write when swap address still in use */
- if(swapcount(outp->daddr) > 1){
- k = kmap(outp);
- kaddr = (char*)VA(k);
-
- if(waserror())
- panic("executeio: page outp I/O error");
-
- n = devtab[c->type]->write(c, kaddr, BY2PG, outp->daddr);
- if(n != BY2PG)
- nexterror();
-
- kunmap(k);
- poperror();
- }
-
- /* drop our extra swap reference */
- putswap((Page*)outp->daddr);
-
- /* Free up the page after I/O */
- putpage(outp);
- }
- ioptr = 0;
-}
-
-int
-needpages(void*)
-{
- return palloc.freecount < swapalloc.headroom;
-}
-
-void
-setswapchan(Chan *c)
-{
- uchar dirbuf[sizeof(Dir)+100];
- Dir d;
- int n;
-
- if(waserror()){
- cclose(c);
- nexterror();
- }
- if(swapimage.c != nil) {
- if(swapalloc.free != conf.nswap)
- error(Einuse);
- cclose(swapimage.c);
- swapimage.c = nil;
- }
-
- /*
- * if this isn't a file, set the swap space
- * to be at most the size of the partition
- */
- if(devtab[c->type]->dc != L'M'){
- n = devtab[c->type]->stat(c, dirbuf, sizeof dirbuf);
- if(n <= 0 || convM2D(dirbuf, n, &d, nil) == 0)
- error("stat failed in setswapchan");
- if(d.length < conf.nswppo*BY2PG)
- error("swap device too small");
- if(d.length < conf.nswap*BY2PG){
- conf.nswap = d.length/BY2PG;
- swapalloc.top = &swapalloc.swmap[conf.nswap];
- swapalloc.free = conf.nswap;
- }
- }
- c->flag &= ~CCACHE;
- cclunk(c);
- swapimage.c = c;
- poperror();
-}
--- a/sys/src/9/ppc/blast
+++ b/sys/src/9/ppc/blast
@@ -1,6 +1,7 @@
dev
root
cons
+ swap
env
flash
pipe
--- a/sys/src/9/ppc/main.c
+++ b/sys/src/9/ppc/main.c
@@ -84,7 +84,6 @@
links();
chandevreset();
pageinit();
- swapinit();
sharedseginit();
fpsave(&initfp);
initfp.fpscr = 0;
--- a/sys/src/9/ppc/mkfile
+++ b/sys/src/9/ppc/mkfile
@@ -31,7 +31,6 @@
qlock.$O\
rdb.$O\
segment.$O\
- swap.$O\
sysfile.$O\
sysproc.$O\
taslock.$O\
--- a/sys/src/9/sgi/indy
+++ b/sys/src/9/sgi/indy
@@ -1,6 +1,7 @@
dev
root
cons
+ swap
uart
mnt
srv
--- a/sys/src/9/sgi/main.c
+++ b/sys/src/9/sgi/main.c
@@ -192,8 +192,6 @@
initseg();
links();
chandevreset();
-
- swapinit();
userinit();
schedinit();
panic("schedinit returned");
--- a/sys/src/9/sgi/mkfile
+++ b/sys/src/9/sgi/mkfile
@@ -38,7 +38,6 @@
rdb.$O\
rebootcmd.$O\
segment.$O\
- swap.$O\
syscallfmt.$O\
sysfile.$O\
sysproc.$O\
--- a/sys/src/9/teg2/main.c
+++ b/sys/src/9/teg2/main.c
@@ -455,7 +455,6 @@
// i8250console(); /* too early; see init0 */
pageinit(); /* prints "1020M memory: ⋯ */
- swapinit();
userinit();
/*
--- a/sys/src/9/teg2/mkfile
+++ b/sys/src/9/teg2/mkfile
@@ -34,7 +34,6 @@
qio.$O\
qlock.$O\
segment.$O\
- swap.$O\
syscallfmt.$O\
sysfile.$O\
sysproc.$O\
--- a/sys/src/9/teg2/ts
+++ b/sys/src/9/teg2/ts
@@ -2,6 +2,7 @@
dev
root
cons
+ swap
env
pipe
proc
--- a/sys/src/9/xen/main.c
+++ b/sys/src/9/xen/main.c
@@ -103,8 +103,6 @@
// conf.monitor = 1;
chandevreset();
pageinit();
-
- swapinit();
userinit();
schedinit();
}
--- a/sys/src/9/xen/mkfile
+++ b/sys/src/9/xen/mkfile
@@ -32,7 +32,6 @@
qlock.$O\
rebootcmd.$O\
segment.$O\
- swap.$O\
sysfile.$O\
sysproc.$O\
taslock.$O\
--- a/sys/src/9/xen/xenpcf
+++ b/sys/src/9/xen/xenpcf
@@ -1,6 +1,7 @@
dev
root netif
cons
+ swap
uart
arch
env
--- a/sys/src/9/zynq/main.c
+++ b/sys/src/9/zynq/main.c
@@ -393,7 +393,6 @@
archinit();
chandevreset();
pageinit();
- swapinit();
screeninit();
userinit();
schedinit();
--- a/sys/src/9/zynq/mkfile
+++ b/sys/src/9/zynq/mkfile
@@ -31,7 +31,6 @@
qio.$O\
qlock.$O\
segment.$O\
- swap.$O\
sysfile.$O\
sysproc.$O\
taslock.$O\
--- a/sys/src/9/zynq/zynq
+++ b/sys/src/9/zynq/zynq
@@ -1,6 +1,7 @@
dev
root
cons
+ swap
arch
uart
mnt