shithub: riscv

Download patch

ref: d4d86df2ab5fdc1d3f812fcc55fc74390a28d08f
parent: 4f95d75098dd911a0d1bd35d13fda7e5318db9d3
author: cinap_lenrek <[email protected]>
date: Sun Jun 22 11:12:45 EDT 2014

kernel: new pagecache, remove Lock from page, use cmpswap for Ref instead of Lock

make the Page stucture less than half its original size by getting rid of
the Lock and the lru.

The Lock was required to coordinate the unchaining of pages that where
both cached and on the lru freelist.

now pages have a single next pointer that is used for palloc.head
freelist xor for page cache hash chains in Image.pghash[].

cached pages are not on the freelist anymore, but will be reclaimed
from images by the pager when the freelist runs out of pages.

each Image has its own 512 hash chains for cached page lookup. That is
2MB worth of pages and there should be no collisions for most text images.

page reclaiming can be done without holding palloc.lock as the Image is
the owner of the page hash chains protected by the Image's lock.

reclaiming Image structures can be done quickly by only reclaiming pages from
inactive images, that is images which are not currently in use by segments.

the Ref structure has no Lock anymore. Only a single long that is atomically
incremented or decremnted using cmpswap().

there are various other changes as a consequence code. and lots of pikeshedding,
sorry.

--- a/sys/src/9/pc/mmu.c
+++ b/sys/src/9/pc/mmu.c
@@ -303,7 +303,7 @@
 		proc->newtlb = 0;
 	}
 
-	if(proc->mmupdb){
+	if(proc->mmupdb != nil){
 		pdb = tmpmap(proc->mmupdb);
 		pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
 		tmpunmap(pdb);
@@ -341,11 +341,11 @@
 	if(islo())
 		panic("mmurelease: islo");
 	taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
-	if(proc->kmaptable){
+	if(proc->kmaptable != nil){
 		if(proc->mmupdb == nil)
 			panic("mmurelease: no mmupdb");
-		if(--proc->kmaptable->ref)
-			panic("mmurelease: kmap ref %d", proc->kmaptable->ref);
+		if(--proc->kmaptable->ref != 0)
+			panic("mmurelease: kmap ref %ld", proc->kmaptable->ref);
 		if(proc->nkmap)
 			panic("mmurelease: nkmap %d", proc->nkmap);
 		/*
@@ -361,23 +361,23 @@
 		 * move kmaptable to free list.
 		 */
 		pagechainhead(proc->kmaptable);
-		proc->kmaptable = 0;
+		proc->kmaptable = nil;
 	}
-	if(proc->mmupdb){
+	if(proc->mmupdb != nil){
 		mmuptefree(proc);
 		mmupdbfree(proc, proc->mmupdb);
-		proc->mmupdb = 0;
+		proc->mmupdb = nil;
 	}
-	for(page = proc->mmufree; page; page = next){
+	for(page = proc->mmufree; page != nil; page = next){
 		next = page->next;
-		if(--page->ref)
-			panic("mmurelease: page->ref %d", page->ref);
+		if(--page->ref != 0)
+			panic("mmurelease: page->ref %ld", page->ref);
 		pagechainhead(page);
 	}
-	if(proc->mmufree && palloc.r.p)
+	if(proc->mmufree != nil && palloc.r.p != nil)
 		wakeup(&palloc.r);
-	proc->mmufree = 0;
-	if(proc->ldt){
+	proc->mmufree = nil;
+	if(proc->ldt != nil){
 		free(proc->ldt);
 		proc->ldt = nil;
 		proc->nldt = 0;
--- a/sys/src/9/port/cache.c
+++ b/sys/src/9/port/cache.c
@@ -55,7 +55,8 @@
 	Extent*	head;
 };
 
-static Image fscache;
+Image fscache;
+
 static Cache cache;
 static Ecache ecache;
 static int maxcache = MAXCACHE;
--- a/sys/src/9/port/chan.c
+++ b/sys/src/9/port/chan.c
@@ -96,25 +96,27 @@
 long
 incref(Ref *r)
 {
-	long x;
+	long old, new;
 
-	lock(r);
-	x = ++r->ref;
-	unlock(r);
-	return x;
+	do {
+		old = r->ref;
+		new = old+1;
+	} while(!cmpswap(&r->ref, old, new));
+	return new;
 }
 
 long
 decref(Ref *r)
 {
-	long x;
+	long old, new;
 
-	lock(r);
-	x = --r->ref;
-	unlock(r);
-	if(x < 0)
-		panic("decref pc=%#p", getcallerpc(&r));
-	return x;
+	do {
+		old = r->ref;
+		if(old <= 0)
+			panic("decref pc=%#p", getcallerpc(&r));
+		new = old-1;
+	} while(!cmpswap(&r->ref, old, new));
+	return new;
 }
 
 /*
--- a/sys/src/9/port/devaoe.c
+++ b/sys/src/9/port/devaoe.c
@@ -954,7 +954,7 @@
 			devdir(c, q, "devlink", 0, eve, 0555, dp);
 			return 1;
 		}
-		if(i >= units.ref)
+		if(i >= Maxunits || i >= units.ref)
 			return -1;
 		d = unit2dev(i);
 		if(s >= d->ndl)
@@ -1728,12 +1728,11 @@
 {
 	int x;
 
-	lock(&units);
-	if(units.ref == Maxunits)
+	x = incref(&units);
+	if(x >= Maxunits){
+		decref(&units);
 		x = -1;
-	else
-		x = units.ref++;
-	unlock(&units);
+	}
 	return x;
 }
 
@@ -1740,12 +1739,7 @@
 static int
 dropunit(void)
 {
-	int x;
-
-	lock(&units);
-	x = --units.ref;
-	unlock(&units);
-	return x;
+	return decref(&units);
 }
 
 /*
@@ -2064,9 +2058,7 @@
 static void
 newvers(Aoedev *d)
 {
-	lock(&drivevers);
-	d->vers = drivevers.ref++;
-	unlock(&drivevers);
+	d->vers = incref(&drivevers);
 }
 
 static int
--- a/sys/src/9/port/devcons.c
+++ b/sys/src/9/port/devcons.c
@@ -480,6 +480,8 @@
 	int i, k, id;
 	vlong offset = off;
 	extern char configfile[];
+	extern Image fscache;
+	extern Image swapimage;
 
 	if(n <= 0)
 		return n;
@@ -611,7 +613,7 @@
 			(uvlong)conf.npage*BY2PG,
 			(uvlong)BY2PG,
 			conf.npage-conf.upages,
-			palloc.user-palloc.freecount, palloc.user,
+			palloc.user-palloc.freecount-fscache.pgref-swapimage.pgref, palloc.user,
 			conf.nswap-swapalloc.free, conf.nswap,
 			(uvlong)mainmem->cursize,
 			(uvlong)mainmem->maxsize,
--- a/sys/src/9/port/devproc.c
+++ b/sys/src/9/port/devproc.c
@@ -260,7 +260,7 @@
 		break;
 	case Qprofile:
 		q = p->seg[TSEG];
-		if(q && q->profile) {
+		if(q != nil && q->profile != nil) {
 			len = (q->top-q->base)>>LRESPROF;
 			len *= sizeof(*q->profile);
 		}
@@ -800,7 +800,7 @@
 
 	case Qprofile:
 		s = p->seg[TSEG];
-		if(s == 0 || s->profile == 0)
+		if(s == nil || s->profile == nil)
 			error("profile is off");
 		i = (s->top-s->base)>>LRESPROF;
 		i *= sizeof(*s->profile);
@@ -904,9 +904,9 @@
 		}
 		for(i=0; i<NSEG; i++){
 			if(s = p->seg[i]){
-				eqlock(&s->lk);
+				eqlock(s);
 				l += mcountseg(s);
-				qunlock(&s->lk);
+				qunlock(s);
 			}
 		}
 		poperror();
@@ -1212,18 +1212,14 @@
 	Segment *s;
 
 	s = p->seg[TSEG];
-	if(s == 0)
+	if(s == nil)
 		error(Enonexist);
 	if(p->state==Dead)
 		error(Eprocdied);
 
-	lock(s);
 	i = s->image;
-	if(i == 0) {
-		unlock(s);
+	if(i == nil)
 		error(Eprocdied);
-	}
-	unlock(s);
 
 	lock(i);
 	if(waserror()) {
@@ -1231,8 +1227,11 @@
 		nexterror();
 	}
 
+	if(i->s != s)
+		error(Eprocdied);
+		
 	tc = i->c;
-	if(tc == 0)
+	if(tc == nil)
 		error(Eprocdied);
 
 	if(incref(tc) == 1 || (tc->flag&COPEN) == 0 || tc->mode!=OREAD) {
@@ -1292,8 +1291,8 @@
 	if(f == nil)
 		error(Eprocdied);
 
+	incref(f);
 	lock(f);
-	f->ref++;
 	while(fd <= f->maxfd){
 		c = f->fd[fd];
 		if(c != nil){
@@ -1417,11 +1416,11 @@
 		s = p->seg[TSEG];
 		if(s == 0 || (s->type&SG_TYPE) != SG_TEXT)
 			error(Ebadctl);
-		if(s->profile != 0)
+		if(s->profile != nil)
 			free(s->profile);
 		npc = (s->top-s->base)>>LRESPROF;
 		s->profile = malloc(npc*sizeof(*s->profile));
-		if(s->profile == 0)
+		if(s->profile == nil)
 			error(Enomem);
 		break;
 	case CMstart:
@@ -1632,9 +1631,9 @@
 	if(i == NSEG)
 		panic("segment gone");
 
-	qunlock(&s->lk);
+	qunlock(s);
 	putseg(s);
-	qlock(&ps->lk);
+	qlock(ps);
 	p->seg[i] = ps;
 	qunlock(&p->seglock);
 
--- a/sys/src/9/port/fault.c
+++ b/sys/src/9/port/fault.c
@@ -28,14 +28,14 @@
 	for(;;) {
 		spllo();
 
-		s = seg(up, addr, 1);		/* leaves s->lk qlocked if seg != nil */
-		if(s == 0) {
+		s = seg(up, addr, 1);		/* leaves s locked if seg != nil */
+		if(s == nil) {
 			up->psstate = sps;
 			return -1;
 		}
 
 		if(!read && (s->type&SG_RONLY)) {
-			qunlock(&s->lk);
+			qunlock(s);
 			up->psstate = sps;
 			return -1;
 		}
@@ -62,7 +62,7 @@
 {
 	char buf[ERRMAX];
 
-	if(c && c->path){
+	if(c != nil && c->path != nil){
 		snprint(buf, sizeof buf, "%s accessing %s: %s", s, c->path->s, up->errstr);
 		s = buf;
 	}
@@ -82,13 +82,13 @@
 	int type;
 	Pte **p, *etp;
 	uintptr soff, mmuphys=0;
-	Page **pg, *lkp, *new;
+	Page **pg, *old, *new;
 	Page *(*fn)(Segment*, uintptr);
 
 	addr &= ~(BY2PG-1);
 	soff = addr-s->base;
 	p = &s->map[soff/PTEMAPMEM];
-	if(*p == 0)
+	if(*p == nil)
 		*p = ptealloc();
 
 	etp = *p;
@@ -116,9 +116,9 @@
 	case SG_BSS:
 	case SG_SHARED:			/* Zero fill on demand */
 	case SG_STACK:
-		if(*pg == 0) {
+		if(*pg == nil) {
 			new = newpage(1, &s, addr);
-			if(s == 0)
+			if(s == nil)
 				return -1;
 			*pg = new;
 		}
@@ -139,23 +139,16 @@
 			break;
 		}
 
-		lkp = *pg;
-		lock(lkp);
-		if(lkp->ref == 0)
-			panic("fault %#p ref == 0", lkp);
-		if(lkp->ref == 1 && lkp->image == nil) {
-			unlock(lkp);
-		} else if(lkp->image == &swapimage && (lkp->ref + swapcount(lkp->daddr)) == 1) {
-			uncachepage(lkp);
-			unlock(lkp);
-		} else {
-			unlock(lkp);
+		old = *pg;
+		if(old->image == &swapimage && (old->ref + swapcount(old->daddr)) == 1)
+			uncachepage(old);
+		if(old->ref > 1 || old->image != nil) {
 			new = newpage(0, &s, addr);
-			if(s == 0)
+			if(s == nil)
 				return -1;
 			*pg = new;
-			copypage(lkp, *pg);
-			putpage(lkp);
+			copypage(old, *pg);
+			putpage(old);
 		}
 		mmuphys = PPN((*pg)->pa) | PTEWRITE | PTEVALID;
 		(*pg)->modref = PG_MOD|PG_REF;
@@ -162,7 +155,7 @@
 		break;
 
 	case SG_PHYSICAL:
-		if(*pg == 0) {
+		if(*pg == nil) {
 			fn = s->pseg->pgalloc;
 			if(fn)
 				*pg = (*fn)(s, addr);
@@ -181,7 +174,7 @@
 		(*pg)->modref = PG_MOD|PG_REF;
 		break;
 	}
-	qunlock(&s->lk);
+	qunlock(s);
 
 	if(doputmmu)
 		putmmu(addr, mmuphys, *pg);
@@ -202,7 +195,7 @@
 
 retry:
 	loadrec = *p;
-	if(loadrec == 0) {	/* from a text/data image */
+	if(loadrec == nil) {	/* from a text/data image */
 		daddr = s->fstart+soff;
 		new = lookpage(s->image, daddr);
 		if(new != nil) {
@@ -229,7 +222,7 @@
 		c = swapimage.c;
 		ask = BY2PG;
 	}
-	qunlock(&s->lk);
+	qunlock(s);
 
 	new = newpage(0, 0, addr);
 	k = kmap(new);
@@ -250,37 +243,42 @@
 
 	poperror();
 	kunmap(k);
-	qlock(&s->lk);
-	if(loadrec == 0) {	/* This is demand load */
+	qlock(s);
+	if(loadrec == nil) {	/* This is demand load */
 		/*
 		 *  race, another proc may have gotten here first while
-		 *  s->lk was unlocked
+		 *  s was unlocked
 		 */
-		if(*p == 0) { 
-			new->daddr = daddr;
-			cachepage(new, s->image);
-			*p = new;
+		if(*p == nil) { 
+			/*
+			 *  check page cache again after i/o to reduce double caching
+			 */
+			*p = lookpage(s->image, daddr);
+			if(*p == nil) {
+				incref(new);
+				new->daddr = daddr;
+				cachepage(new, s->image);
+				*p = new;
+			}
 		}
-		else
-			putpage(new);
 	}
 	else {			/* This is paged out */
 		/*
 		 *  race, another proc may have gotten here first
 		 *  (and the pager may have run on that page) while
-		 *  s->lk was unlocked
+		 *  s was unlocked
 		 */
-		if(*p != loadrec){
-			if(!pagedout(*p)){
+		if(*p != loadrec) {
+			if(!pagedout(*p)) {
 				/* another process did it for me */
-				putpage(new);
 				goto done;
-			} else if(*p) {
+			} else if(*p != nil) {
 				/* another process and the pager got in */
 				putpage(new);
 				goto retry;
 			} else {
 				/* another process segfreed the page */
+				incref(new);
 				k = kmap(new);
 				memset((void*)VA(k), 0, ask);
 				kunmap(k);
@@ -289,13 +287,14 @@
 			}
 		}
 
+		incref(new);
 		new->daddr = daddr;
 		cachepage(new, &swapimage);
 		*p = new;
 		putswap(loadrec);
 	}
-
 done:
+	putpage(new);
 	if(s->flushme)
 		memset((*p)->cachectl, PG_TXTFLUSH, sizeof((*p)->cachectl));
 }
@@ -311,7 +310,7 @@
 	if((long)len >= 0) {
 		for(;;) {
 			s = seg(up, addr, 0);
-			if(s == 0 || (write && (s->type&SG_RONLY)))
+			if(s == nil || (write && (s->type&SG_RONLY)))
 				break;
 
 			if(addr+len > s->top) {
@@ -369,21 +368,20 @@
 
 	et = &p->seg[NSEG];
 	for(s = p->seg; s < et; s++) {
-		n = *s;
-		if(n == 0)
+		if((n = *s) == nil)
 			continue;
 		if(addr >= n->base && addr < n->top) {
 			if(dolock == 0)
 				return n;
 
-			qlock(&n->lk);
+			qlock(n);
 			if(addr >= n->base && addr < n->top)
 				return n;
-			qunlock(&n->lk);
+			qunlock(n);
 		}
 	}
 
-	return 0;
+	return nil;
 }
 
 extern void checkmmu(uintptr, uintptr);
@@ -402,22 +400,20 @@
 
 	checked = 0;
 	for(sp=up->seg, ep=&up->seg[NSEG]; sp<ep; sp++){
-		s = *sp;
-		if(s == nil)
+		if((s = *sp) == nil)
 			continue;
-		qlock(&s->lk);
+		qlock(s);
 		for(addr=s->base; addr<s->top; addr+=BY2PG){
 			off = addr - s->base;
-			p = s->map[off/PTEMAPMEM];
-			if(p == 0)
+			if((p = s->map[off/PTEMAPMEM]) == nil)
 				continue;
 			pg = p->pages[(off&(PTEMAPMEM-1))/BY2PG];
-			if(pg == 0 || pagedout(pg))
+			if(pagedout(pg))
 				continue;
 			checkmmu(addr, pg->pa);
 			checked++;
 		}
-		qunlock(&s->lk);
+		qunlock(s);
 	}
 	print("%ld %s: checked %d page table entries\n", up->pid, up->text, checked);
 }
--- a/sys/src/9/port/page.c
+++ b/sys/src/9/port/page.c
@@ -5,10 +5,8 @@
 #include	"fns.h"
 #include	"../port/error.h"
 
-#define	pghash(daddr)	palloc.hash[(daddr>>PGSHIFT)&(PGHSIZE-1)]
+Palloc palloc;
 
-struct	Palloc palloc;
-
 void
 pageinit(void)
 {
@@ -31,24 +29,19 @@
 	}
 
 	color = 0;
-	palloc.head = palloc.pages;
-	p = palloc.head;
+	palloc.head = nil;
+	p = palloc.pages;
 	for(i=0; i<nelem(palloc.mem); i++){
 		pm = &palloc.mem[i];
 		for(j=0; j<pm->npage; j++){
 			memset(p, 0, sizeof *p);
-			p->prev = p-1;
-			p->next = p+1;
 			p->pa = pm->base+j*BY2PG;
 			p->color = color;
-			palloc.freecount++;
 			color = (color+1)%NCOLOR;
+			pagechainhead(p);
 			p++;
 		}
 	}
-	palloc.tail = p - 1;
-	palloc.head->prev = 0;
-	palloc.tail->next = 0;
 
 	palloc.user = p - palloc.pages;
 	u = palloc.user*BY2PG;
@@ -71,78 +64,100 @@
 }
 
 void
-pageunchain(Page *p)
+pagechainhead(Page *p)
 {
-	if(canlock(&palloc))
-		panic("pageunchain (palloc %p)", &palloc);
-	if(p->prev)
-		p->prev->next = p->next;
-	else
-		palloc.head = p->next;
-	if(p->next)
-		p->next->prev = p->prev;
-	else
-		palloc.tail = p->prev;
-	p->prev = p->next = nil;
-	palloc.freecount--;
+	p->next = palloc.head;
+	palloc.head = p;
+	palloc.freecount++;
 }
 
-void
-pagechaintail(Page *p)
+static void
+freepages(Page *head, Page *tail, int n)
 {
-	if(canlock(&palloc))
-		panic("pagechaintail");
-	if(palloc.tail) {
-		p->prev = palloc.tail;
-		palloc.tail->next = p;
-	}
-	else {
-		palloc.head = p;
-		p->prev = 0;
-	}
-	palloc.tail = p;
-	p->next = 0;
-	palloc.freecount++;
+	lock(&palloc);
+	tail->next = palloc.head;
+	palloc.head = head;
+	palloc.freecount += n;
+	if(palloc.r.p != nil)
+		wakeup(&palloc.r);
+	unlock(&palloc);
 }
 
-void
-pagechainhead(Page *p)
+int
+pagereclaim(Image *i, int min)
 {
-	if(canlock(&palloc))
-		panic("pagechainhead");
-	if(palloc.head) {
-		p->next = palloc.head;
-		palloc.head->prev = p;
+	Page **h, **l, *p;
+	Page *fh, *ft;
+	int n;
+
+	lock(i);
+	if(i->pgref == 0){
+		unlock(i);
+		return 0;
 	}
-	else {
-		palloc.tail = p;
-		p->next = 0;
+	incref(i);
+
+	n = 0;
+	fh = ft = nil;
+	for(h = i->pghash; h < &i->pghash[PGHSIZE]; h++){
+		if((p = *h) == nil)
+			continue;
+		for(l = h; p != nil; p = p->next){
+			if(p->ref == 0)
+				break;
+			l = &p->next;
+		}
+		if(p == nil)
+			continue;
+
+		*l = p->next;
+		p->next = nil;
+		p->image = nil;
+		p->daddr = ~0;
+		i->pgref--;
+		decref(i);
+
+		if(fh == nil)
+			fh = p;
+		else
+			ft->next = p;
+		ft = p;
+		if(++n >= min)
+			break;
 	}
-	palloc.head = p;
-	p->prev = 0;
-	palloc.freecount++;
+	unlock(i);
+	putimage(i);
+
+	if(n > 0)
+		freepages(fh, ft, n);
+
+	return n;
 }
 
+int
+ispages(void*)
+{
+	return palloc.freecount >= swapalloc.highwater;
+}
+
 Page*
 newpage(int clear, Segment **s, uintptr va)
 {
-	Page *p;
+	Page *p, **l;
 	KMap *k;
 	uchar ct;
-	int i, hw, color;
+	int i, color;
 
-	lock(&palloc);
 	color = getpgcolor(va);
-	hw = swapalloc.highwater;
+	lock(&palloc);
 	for(;;) {
-		if(palloc.freecount > hw)
+		if(palloc.freecount > swapalloc.highwater)
 			break;
 		if(up->kp && palloc.freecount > 0)
 			break;
-
 		unlock(&palloc);
-		if(s)
-			qunlock(&((*s)->lk));
+		if(s != nil)
+			qunlock(*s);
 
 		if(!waserror()){
 			eqlock(&palloc.pwait);	/* Hold memory requesters here */
@@ -164,9 +179,9 @@
 		 * a page. Fault will call newpage again when it has
 		 * reacquired the segment locks
 		 */
-		if(s){
-			*s = 0;
-			return 0;
+		if(s != nil){
+			*s = nil;
+			return nil;
 		}
 
 		lock(&palloc);
@@ -173,31 +188,31 @@
 	}
 
 	/* First try for our colour */
-	for(p = palloc.head; p; p = p->next)
+	l = &palloc.head;
+	for(p = *l; p != nil; p = p->next){
 		if(p->color == color)
 			break;
+		l = &p->next;
+	}
 
 	ct = PG_NOFLUSH;
-	if(p == 0) {
-		p = palloc.head;
+	if(p == nil) {
+		l = &palloc.head;
+		p = *l;
 		p->color = color;
 		ct = PG_NEWCOL;
 	}
 
-	pageunchain(p);
+	*l = p->next;
+	p->next = nil;
+	palloc.freecount--;
+	unlock(&palloc);
 
-	lock(p);
-	if(p->ref != 0)
-		panic("newpage: p->ref %d != 0", p->ref);
-
-	uncachepage(p);
-	p->ref++;
+	p->ref = 1;
 	p->va = va;
 	p->modref = 0;
 	for(i = 0; i < MAXMACH; i++)
 		p->cachectl[i] = ct;
-	unlock(p);
-	unlock(&palloc);
 
 	if(clear) {
 		k = kmap(p);
@@ -208,12 +223,6 @@
 	return p;
 }
 
-int
-ispages(void*)
-{
-	return palloc.freecount >= swapalloc.highwater;
-}
-
 void
 putpage(Page *p)
 {
@@ -221,29 +230,12 @@
 		putswap(p);
 		return;
 	}
-
-	lock(&palloc);
-	lock(p);
-
-	if(p->ref == 0)
-		panic("putpage");
-
-	if(--p->ref > 0) {
-		unlock(p);
-		unlock(&palloc);
+	if(p->image != nil) {
+		decref(p);
 		return;
 	}
-
-	if(p->image && p->image != &swapimage)
-		pagechaintail(p);
-	else 
-		pagechainhead(p);
-
-	if(palloc.r.p != 0)
-		wakeup(&palloc.r);
-
-	unlock(p);
-	unlock(&palloc);
+	if(decref(p) == 0)
+		freepages(p, p, 1);
 }
 
 Page*
@@ -253,19 +245,15 @@
 
 	lock(&palloc);
 	p = palloc.head;
-	if(palloc.freecount < swapalloc.highwater) {
+	if(p == nil || palloc.freecount < swapalloc.highwater) {
 		unlock(&palloc);
-		return 0;
+		return nil;
 	}
-	pageunchain(p);
-
-	lock(p);
-	if(p->ref != 0)
-		panic("auxpage");
-	p->ref++;
-	uncachepage(p);
-	unlock(p);
+	palloc.head = p->next;
+	p->next = nil;
+	palloc.freecount--;
 	unlock(&palloc);
+	p->ref = 1;
 
 	return p;
 }
@@ -283,116 +271,82 @@
 }
 
 void
-uncachepage(Page *p)			/* Always called with a locked page */
+cachepage(Page *p, Image *i)
 {
-	Page **l, *f;
+	Page **h;
+
+	lock(i);
+	p->image = i;
+	h = &PGHASH(i, p->daddr);
+	p->next = *h;
+	*h = p;
+	incref(i);
+	i->pgref++;
+	unlock(i);
+}
+
+void
+uncachepage(Page *p)
+{
+	Page **l, *x;
 	Image *i;
 
 	i = p->image;
-	if(i == 0)
+	if(i == nil)
 		return;
 
-	lock(&palloc.hashlock);
-	l = &pghash(p->daddr);
-	for(f = *l; f; f = f->hash) {
-		if(f == p) {
-			*l = p->hash;
-			break;
+	lock(i);
+	if(p->image != i){
+		unlock(i);
+		return;
+	}
+	l = &PGHASH(i, p->daddr);
+	for(x = *l; x != nil; x = x->next) {
+		if(x == p){
+			*l = p->next;
+			p->next = nil;
+			p->image = nil;
+			p->daddr = ~0;
+			i->pgref--;
+			unlock(i);
+			putimage(i);
+			return;
 		}
-		l = &f->hash;
+		l = &x->next;
 	}
-	unlock(&palloc.hashlock);
-	p->image = 0;
-	p->daddr = 0;
-
-	lock(i);
-	i->pgref--;
 	unlock(i);
-	putimage(i);
 }
 
-void
-cachepage(Page *p, Image *i)
+Page*
+lookpage(Image *i, uintptr daddr)
 {
-	Page **l;
+	Page *p;
 
-	/* If this ever happens it should be fixed by calling
-	 * uncachepage instead of panic. I think there is a race
-	 * with pio in which this can happen. Calling uncachepage is
-	 * correct - I just wanted to see if we got here.
-	 */
-	if(p->image)
-		panic("cachepage");
-
 	lock(i);
-	i->ref++;
-	i->pgref++;
+	for(p = PGHASH(i, daddr); p != nil; p = p->next) {
+		if(p->daddr == daddr) {
+			incref(p);
+			unlock(i);
+			return p;
+		}
+	}
 	unlock(i);
 
-	lock(&palloc.hashlock);
-	p->image = i;
-	l = &pghash(p->daddr);
-	p->hash = *l;
-	*l = p;
-	unlock(&palloc.hashlock);
+	return nil;
 }
 
 void
 cachedel(Image *i, uintptr daddr)
 {
-	Page *f;
+	Page *p;
 
-retry:
-	lock(&palloc.hashlock);
-	for(f = pghash(daddr); f; f = f->hash) {
-		if(f->image == i && f->daddr == daddr) {
-			unlock(&palloc.hashlock);
-
-			lock(f);
-			if(f->image != i || f->daddr != daddr) {
-				unlock(f);
-				goto retry;
-			}
-			uncachepage(f);
-			unlock(f);
-
-			return;
-		}
+	while((p = lookpage(i, daddr)) != nil){
+		uncachepage(p);
+		putpage(p);
 	}
-	unlock(&palloc.hashlock);
 }
 
-Page *
-lookpage(Image *i, uintptr daddr)
-{
-	Page *f;
 
-retry:
-	lock(&palloc.hashlock);
-	for(f = pghash(daddr); f; f = f->hash) {
-		if(f->image == i && f->daddr == daddr) {
-			unlock(&palloc.hashlock);
-
-			lock(&palloc);
-			lock(f);
-			if(f->image != i || f->daddr != daddr) {
-				unlock(f);
-				unlock(&palloc);
-				goto retry;
-			}
-			if(++f->ref == 1)
-				pageunchain(f);
-			unlock(&palloc);
-			unlock(f);
-
-			return f;
-		}
-	}
-	unlock(&palloc.hashlock);
-
-	return 0;
-}
-
 Pte*
 ptecpy(Pte *old)
 {
@@ -403,14 +357,11 @@
 	dst = &new->pages[old->first-old->pages];
 	new->first = dst;
 	for(src = old->first; src <= old->last; src++, dst++)
-		if(*src) {
+		if(*src != nil) {
 			if(onswap(*src))
 				dupswap(*src);
-			else {
-				lock(*src);
-				(*src)->ref++;
-				unlock(*src);
-			}
+			else
+				incref(*src);
 			new->last = dst;
 			*dst = *src;
 		}
@@ -432,39 +383,35 @@
 void
 freepte(Segment *s, Pte *p)
 {
-	int ref;
 	void (*fn)(Page*);
-	Page *pt, **pg, **ptop;
+	Page **pg, **ptop;
 
 	switch(s->type&SG_TYPE) {
 	case SG_PHYSICAL:
 		fn = s->pseg->pgfree;
 		ptop = &p->pages[PTEPERTAB];
-		if(fn) {
+		if(fn != nil) {
 			for(pg = p->pages; pg < ptop; pg++) {
-				if(*pg == 0)
+				if(*pg == nil)
 					continue;
 				(*fn)(*pg);
-				*pg = 0;
+				*pg = nil;
 			}
 			break;
 		}
 		for(pg = p->pages; pg < ptop; pg++) {
-			pt = *pg;
-			if(pt == 0)
-				continue;
-			lock(pt);
-			ref = --pt->ref;
-			unlock(pt);
-			if(ref == 0)
-				free(pt);
+			if(*pg != nil) {
+				if(decref(*pg) == 0)
+					free(*pg);
+				*pg = nil;
+			}
 		}
 		break;
 	default:
 		for(pg = p->first; pg <= p->last; pg++)
-			if(*pg) {
+			if(*pg != nil) {
 				putpage(*pg);
-				*pg = 0;
+				*pg = nil;
 			}
 	}
 	free(p);
@@ -503,7 +450,7 @@
 	nwrong = 0;
 	for(i=0; i<np; i++){
 		if(palloc.pages[i].ref != ref[i]){
-			iprint("page %#p ref %d actual %lud\n", 
+			iprint("page %#p ref %ld actual %lud\n", 
 				palloc.pages[i].pa, palloc.pages[i].ref, ref[i]);
 			ref[i] = 1;
 			nwrong++;
--- a/sys/src/9/port/pgrp.c
+++ b/sys/src/9/port/pgrp.c
@@ -148,10 +148,8 @@
 	/*
 	 * Allocate mount ids in the same sequence as the parent group
 	 */
-	lock(&mountid);
 	for(m = order; m; m = m->order)
-		m->copy->mountid = mountid.ref++;
-	unlock(&mountid);
+		m->copy->mountid = incref(&mountid);
 	wunlock(&from->ns);
 }
 
--- a/sys/src/9/port/portdat.h
+++ b/sys/src/9/port/portdat.h
@@ -60,7 +60,6 @@
 
 struct Ref
 {
-	Lock;
 	long	ref;
 };
 
@@ -151,7 +150,8 @@
 
 struct Chan
 {
-	Ref;				/* the Lock in this Ref is also Chan's lock */
+	Ref;
+	Lock;
 	Chan*	next;			/* allocation */
 	Chan*	link;
 	vlong	offset;			/* in fd */
@@ -311,19 +311,16 @@
 
 struct Page
 {
-	Lock;
+	Ref;
+	Page	*next;			/* Free list or Hash chains */
 	uintptr	pa;			/* Physical address in memory */
 	uintptr	va;			/* Virtual address for user */
 	uintptr	daddr;			/* Disc address on swap */
-	ulong	gen;			/* Generation counter for swap */
-	ushort	ref;			/* Reference count */
+	Image	*image;			/* Associated text or swap image */
+	ushort	refage;			/* Swap reference age */
 	char	modref;			/* Simulated modify/reference bits */
 	char	color;			/* Cache coloring */
 	char	cachectl[MAXMACH];	/* Cache flushing control for putmmu */
-	Image	*image;			/* Associated text or swap image */
-	Page	*next;			/* Lru free list */
-	Page	*prev;
-	Page	*hash;			/* Image hash chains */
 };
 
 struct Swapalloc
@@ -340,20 +337,6 @@
 	ulong	xref;			/* Ref count for all map refs >= 255 */
 }swapalloc;
 
-struct Image
-{
-	Ref;
-	long	pgref;			/* number of cached pages (pgref <= ref) */
-	Chan	*c;			/* channel to text file, nil when not used */
-	Qid 	qid;			/* Qid for page cache coherence */
-	ulong	dev;			/* Device id of owning channel */
-	ushort	type;			/* Device type of owning channel */
-	Segment *s;			/* TEXT segment for image if running */
-	Image	*hash;			/* Qid hash chains */
-	Image	*next;			/* Free list */
-	char	notext;			/* no file associated */
-};
-
 struct Pte
 {
 	Page	*pages[PTEPERTAB];	/* Page map for this chunk of pte */
@@ -405,7 +388,7 @@
 struct Segment
 {
 	Ref;
-	QLock	lk;
+	QLock;
 	ushort	steal;		/* Page stealer lock */
 	ushort	type;		/* segment type */
 	uintptr	base;		/* virtual base */
@@ -436,10 +419,29 @@
 };
 #define REND(p,s)	((p)->rendhash[(s)&((1<<RENDLOG)-1)])
 #define MOUNTH(p,qid)	((p)->mnthash[(qid).path&((1<<MNTLOG)-1)])
+#define PGHASH(i,daddr)	((i)->pghash[((daddr)>>PGSHIFT)&(PGHSIZE-1)])
 
+struct Image
+{
+	Ref;
+	Lock;
+	Chan	*c;			/* channel to text file, nil when not used */
+	Qid 	qid;			/* Qid for page cache coherence */
+	ulong	dev;			/* Device id of owning channel */
+	ushort	type;			/* Device type of owning channel */
+	char	notext;			/* no file associated */
+	Segment *s;			/* TEXT segment for image if running */
+	Image	*hash;			/* Qid hash chains */
+	Image	*next;			/* Free list */
+	long	pgref;			/* number of cached pages (pgref <= ref) */
+	Page	*pghash[PGHSIZE];	/* page cache */
+};
+
+
 struct Pgrp
 {
-	Ref;				/* also used as a lock when mounting */
+	Ref;
+	Lock;
 	int	noattach;
 	ulong	pgrpid;
 	QLock	debug;			/* single access via devproc.c */
@@ -449,7 +451,8 @@
 
 struct Rgrp
 {
-	Ref;				/* the Ref's lock is also the Rgrp's lock */
+	Ref;
+	Lock;
 	Proc	*rendhash[RENDHASH];	/* Rendezvous tag hash */
 };
 
@@ -476,6 +479,7 @@
 struct Fgrp
 {
 	Ref;
+	Lock;
 	Chan	**fd;
 	int	nfd;			/* number allocated */
 	int	maxfd;			/* highest fd in use */
@@ -497,13 +501,10 @@
 {
 	Lock;
 	Pallocmem	mem[4];
-	Page	*head;			/* most recently used */
-	Page	*tail;			/* least recently used */
+	Page	*head;			/* freelist head */
 	ulong	freecount;		/* how many pages on free list now */
 	Page	*pages;			/* array of all pages */
 	ulong	user;			/* how many user pages */
-	Page	*hash[PGHSIZE];
-	Lock	hashlock;
 	Rendez	r;			/* Sleep for free mem */
 	QLock	pwait;			/* Queue of procs waiting for memory */
 };
@@ -772,6 +773,7 @@
 extern	Queue*	serialoq;
 extern	char*	statename[];
 extern	Image	swapimage;
+extern	Image	fscache;
 extern	char*	sysname;
 extern	uint	qiomaxatomic;
 extern	char*	sysctab[];
--- a/sys/src/9/port/portfns.h
+++ b/sys/src/9/port/portfns.h
@@ -202,7 +202,6 @@
 int		openmode(ulong);
 Block*		packblock(Block*);
 Block*		padblock(Block*, int);
-void		pageunchain(Page*);
 void		pagechainhead(Page*);
 void		pageinit(void);
 ulong	pagenumber(Page*);
--- a/sys/src/9/port/proc.c
+++ b/sys/src/9/port/proc.c
@@ -1350,13 +1350,12 @@
 		return;
 
 	/*
-	 *  wait for all processors to take a clock interrupt
+	 *  wait for all other processors to take a clock interrupt
 	 *  and flush their mmu's
 	 */
 	for(nm = 0; nm < conf.nmach; nm++)
-		if(MACHP(nm) != m)
-			while(MACHP(nm)->flushmmu)
-				sched();
+		while(m->machno != nm && MACHP(nm)->flushmmu)
+			sched();
 }
 
 void
@@ -1514,10 +1513,10 @@
 		l = 0;
 		for(i=1; i<NSEG; i++) {
 			s = p->seg[i];
-			if(s == 0 || !canqlock(&s->lk))
+			if(s == nil || !canqlock(s))
 				continue;
 			l += (ulong)mcountseg(s);
-			qunlock(&s->lk);
+			qunlock(s);
 		}
 		qunlock(&p->seglock);
 		if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) {
@@ -1537,9 +1536,9 @@
 	kp->procctl = Proc_exitbig;
 	for(i = 0; i < NSEG; i++) {
 		s = kp->seg[i];
-		if(s != 0 && canqlock(&s->lk)) {
+		if(s != nil && canqlock(s)) {
 			mfreeseg(s, s->base, (s->top - s->base)/BY2PG);
-			qunlock(&s->lk);
+			qunlock(s);
 		}
 	}
 	qunlock(&kp->seglock);
--- a/sys/src/9/port/segment.c
+++ b/sys/src/9/port/segment.c
@@ -5,10 +5,8 @@
 #include	"fns.h"
 #include	"../port/error.h"
 
-static void	imagereclaim(void);
+int imagereclaim(int);
 
-#include "io.h"
-
 /*
  * Attachable segment types
  */
@@ -25,6 +23,7 @@
 static struct Imagealloc
 {
 	Lock;
+	Image	*list;
 	Image	*free;
 	Image	*hash[IHASHSIZE];
 	QLock	ireclaim;	/* mutex on reclaiming free images */
@@ -37,13 +36,14 @@
 {
 	Image *i, *ie;
 
-	imagealloc.free = xalloc(conf.nimage*sizeof(Image));
-	if(imagealloc.free == nil)
+	imagealloc.list = xalloc(conf.nimage*sizeof(Image));
+	if(imagealloc.list == nil)
 		panic("initseg: no memory for Image");
-	ie = &imagealloc.free[conf.nimage-1];
-	for(i = imagealloc.free; i < ie; i++)
+	ie = &imagealloc.list[conf.nimage-1];
+	for(i = imagealloc.list; i < ie; i++)
 		i->next = i+1;
-	i->next = 0;
+	i->next = nil;
+	imagealloc.free = imagealloc.list;
 }
 
 Segment *
@@ -55,7 +55,9 @@
 	if(size > (SEGMAPSIZE*PTEPERTAB))
 		error(Enovmem);
 
-	s = smalloc(sizeof(Segment));
+	s = malloc(sizeof(Segment));
+	if(s == nil)
+		error(Enomem);
 	s->ref = 1;
 	s->type = type;
 	s->base = base;
@@ -66,7 +68,11 @@
 
 	mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
 	if(mapsize > nelem(s->ssegmap)){
-		s->map = smalloc(mapsize*sizeof(Pte*));
+		s->map = malloc(mapsize*sizeof(Pte*));
+		if(s->map == nil){
+			free(s);
+			error(Enomem);
+		}
 		s->mapsize = mapsize;
 	}
 	else{
@@ -83,41 +89,33 @@
 	Pte **pp, **emap;
 	Image *i;
 
-	if(s == 0)
+	if(s == nil)
 		return;
 
 	i = s->image;
-	if(i != 0) {
+	if(i != nil) {
 		lock(i);
-		lock(s);
-		if(i->s == s && s->ref == 1)
-			i->s = 0;
+		if(decref(s) != 0){
+			unlock(i);
+			return;
+		}
+		if(i->s == s)
+			i->s = nil;
 		unlock(i);
-	}
-	else
-		lock(s);
-
-	s->ref--;
-	if(s->ref != 0) {
-		unlock(s);
+		putimage(i);
+	} else if(decref(s) != 0)
 		return;
-	}
-	unlock(s);
 
-	qlock(&s->lk);
-	if(i)
-		putimage(i);
-
 	emap = &s->map[s->mapsize];
 	for(pp = s->map; pp < emap; pp++)
-		if(*pp)
+		if(*pp != nil)
 			freepte(s, *pp);
 
-	qunlock(&s->lk);
 	if(s->map != s->ssegmap)
 		free(s->map);
-	if(s->profile != 0)
+	if(s->profile != nil)
 		free(s->profile);
+
 	free(s);
 }
 
@@ -129,11 +127,10 @@
 
 	endpte = &s->map[s->mapsize];
 	for(p = s->map; p < endpte; p++) {
-		if(*p == 0)
+		if((pte = *p) == nil)
 			continue;
-		pte = *p;
 		for(pg = pte->first; pg <= pte->last; pg++) {
-			if(x = *pg)
+			if((x = *pg) != nil)
 				x->va += offset;
 		}
 	}
@@ -149,9 +146,9 @@
 	SET(n);
 	s = seg[segno];
 
-	qlock(&s->lk);
+	qlock(s);
 	if(waserror()){
-		qunlock(&s->lk);
+		qunlock(s);
 		nexterror();
 	}
 	switch(s->type&SG_TYPE) {
@@ -174,7 +171,7 @@
 		if(segno == TSEG){
 			n = data2txt(s);
 			poperror();
-			qunlock(&s->lk);
+			qunlock(s);
 			return n;
 		}
 
@@ -190,7 +187,7 @@
 	}
 	size = s->mapsize;
 	for(i = 0; i < size; i++)
-		if(pte = s->map[i])
+		if((pte = s->map[i]) != nil)
 			n->map[i] = ptecpy(pte);
 
 	n->flushme = s->flushme;
@@ -197,13 +194,13 @@
 	if(s->ref > 1)
 		procflushseg(s);
 	poperror();
-	qunlock(&s->lk);
+	qunlock(s);
 	return n;
 
 sameseg:
 	incref(s);
 	poperror();
-	qunlock(&s->lk);
+	qunlock(s);
 	return s;
 }
 
@@ -219,7 +216,7 @@
 
 	off = p->va - s->base;
 	pte = &s->map[off/PTEMAPMEM];
-	if(*pte == 0)
+	if(*pte == nil)
 		*pte = ptealloc();
 
 	pg = &(*pte)->pages[(off&(PTEMAPMEM-1))/BY2PG];
@@ -250,14 +247,11 @@
 		}
 	}
 
-	/*
-	 * imagereclaim dumps pages from the free list which are cached by image
-	 * structures. This should free some image structures.
-	 */
-	while(!(i = imagealloc.free)) {
+	/* dump pages of inactive images to free image structures */
+	while((i = imagealloc.free) == nil) {
 		unlock(&imagealloc);
-		imagereclaim();
-		if(!imagealloc.free){
+		imagereclaim(1000);
+		if(imagealloc.free == nil){
 			freebroken();		/* can use the memory */
 			resrcwait("no image after reclaim");
 		}
@@ -276,15 +270,15 @@
 	*l = i;
 
 found:
+	unlock(&imagealloc);
 	if(i->c == nil){
 		i->c = c;
 		c->flag &= ~CCACHE;
 		incref(c);
 	}
-	unlock(&imagealloc);
 
-	if(i->s == 0) {
-		i->ref++;
+	if(i->s == nil) {
+		incref(i);
 		if(waserror()) {
 			unlock(i);
 			putimage(i);
@@ -300,55 +294,38 @@
 	return i;
 }
 
-static struct {
-	int	calls;			/* times imagereclaim was called */
-	int	loops;			/* times the main loop was run */
-	uvlong	ticks;			/* total time in the main loop */
-	uvlong	maxt;			/* longest time in main loop */
-} irstats;
+extern int pagereclaim(Image*, int);	/* page.c */
 
-static void
-imagereclaim(void)
+int
+imagereclaim(int min)
 {
-	int n;
-	Page *p, *x;
-	uvlong ticks;
+	static Image *i, *ie;
+	int j, n;
 
-	irstats.calls++;
-	/* Somebody is already cleaning the page cache */
-	if(!canqlock(&imagealloc.ireclaim))
-		return;
-
-	lock(&palloc);
-	ticks = fastticks(nil);
+	eqlock(&imagealloc.ireclaim);
+	if(i == nil){
+		i = imagealloc.list;
+		ie = &imagealloc.list[conf.nimage];
+	}
 	n = 0;
-	/*
-	 * All the pages with images backing them are at the
-	 * end of the list (see putpage) so start there and work
-	 * backward.
-	 */
-	for(p = palloc.tail; p && p->image && (n<1000 || !imagealloc.free); p = x) {
-		x = p->prev;
-		if(p->ref == 0 && canlock(p)) {
-			if(p->ref == 0 && p->image && !p->image->notext) {
-				n++;
-				uncachepage(p);
-
-				/* move to head to maintain the invariant above */
-				pageunchain(p);
-				pagechainhead(p);
-			}
-			unlock(p);
+	for(j = 0; j < conf.nimage; j++, i++){
+		if(i >= ie)
+			i = imagealloc.list;
+		if(i->ref == 0)
+			continue;
+		/*
+		 * if there are no free image structures, only
+		 * reclaim pages from inactive images.
+		 */
+		if(imagealloc.free != nil || i->ref == i->pgref){
+			n += pagereclaim(i, min - n);
+			if(n >= min)
+				break;
 		}
 	}
-	ticks = fastticks(nil) - ticks;
-	unlock(&palloc);
-	irstats.loops++;
-	irstats.ticks += ticks;
-	if(ticks > irstats.maxt)
-		irstats.maxt = ticks;
-	//print("T%llud+", ticks);
 	qunlock(&imagealloc.ireclaim);
+
+	return n;
 }
 
 void
@@ -356,28 +333,31 @@
 {
 	Image *f, **l;
 	Chan *c;
+	int r;
 
-	if(i->notext)
+	if(i->notext){
+		decref(i);
 		return;
+	}
 
 	c = nil;
 	lock(i);
-	if(--i->ref == i->pgref){
+	r = decref(i);
+	if(r == i->pgref){
 		/*
 		 * all remaining references to this image are from the
-		 * page cache now. close the channel as we can reattach
-		 * the chan on attachimage()
+		 * page cache, so close the chan.
 		 */
 		c = i->c;
 		i->c = nil;
 	}
-	if(i->ref == 0){
+	if(r == 0){
 		l = &ihash(i->qid.path);
 		mkqid(&i->qid, ~0, ~0, QTFILE);
 		unlock(i);
 
 		lock(&imagealloc);
-		for(f = *l; f; f = f->hash) {
+		for(f = *l; f != nil; f = f->hash) {
 			if(f == i) {
 				*l = i->hash;
 				break;
@@ -389,7 +369,7 @@
 		unlock(&imagealloc);
 	} else
 		unlock(i);
-	if(c)
+	if(c != nil)
 		ccloseq(c);	/* does not block */
 }
 
@@ -403,18 +383,18 @@
 	Pte **map;
 
 	s = up->seg[seg];
-	if(s == 0)
+	if(s == nil)
 		error(Ebadarg);
 
 	if(addr == 0)
 		return s->base;
 
-	qlock(&s->lk);
+	qlock(s);
 
 	/* We may start with the bss overlapping the data */
 	if(addr < s->base) {
-		if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) {
-			qunlock(&s->lk);
+		if(seg != BSEG || up->seg[DSEG] == nil || addr < up->seg[DSEG]->base) {
+			qunlock(s);
 			error(Enovmem);
 		}
 		addr = s->base;
@@ -429,13 +409,13 @@
 		 * already by another proc and is past the validaddr stage.
 		 */
 		if(s->ref > 1){
-			qunlock(&s->lk);
+			qunlock(s);
 			error(Einuse);
 		}
 		mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
 		s->top = newtop;
 		s->size = newsize;
-		qunlock(&s->lk);
+		qunlock(s);
 		flushmmu();
 		return 0;
 	}
@@ -442,16 +422,16 @@
 
 	for(i = 0; i < NSEG; i++) {
 		ns = up->seg[i];
-		if(ns == 0 || ns == s)
+		if(ns == nil || ns == s)
 			continue;
 		if(newtop >= ns->base && newtop < ns->top) {
-			qunlock(&s->lk);
+			qunlock(s);
 			error(Esoverlap);
 		}
 	}
 
 	if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
-		qunlock(&s->lk);
+		qunlock(s);
 		error(Enovmem);
 	}
 	mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
@@ -466,33 +446,34 @@
 
 	s->top = newtop;
 	s->size = newsize;
-	qunlock(&s->lk);
+	qunlock(s);
 	return 0;
 }
 
 /*
- *  called with s->lk locked
+ *  called with s locked
  */
 int
 mcountseg(Segment *s)
 {
 	int i, j, pages;
-	Page **map;
+	Page *pg;
 
 	pages = 0;
 	for(i = 0; i < s->mapsize; i++){
-		if(s->map[i] == 0)
+		if(s->map[i] == nil)
 			continue;
-		map = s->map[i]->pages;
-		for(j = 0; j < PTEPERTAB; j++)
-			if(map[j])
+		for(j = 0; j < PTEPERTAB; j++){
+			pg = s->map[i]->pages[j];
+			if(!pagedout(pg))
 				pages++;
+		}
 	}
 	return pages;
 }
 
 /*
- *  called with s->lk locked
+ *  called with s locked
  */
 void
 mfreeseg(Segment *s, uintptr start, int pages)
@@ -500,17 +481,23 @@
 	int i, j, size;
 	uintptr soff;
 	Page *pg;
-	Page *list;
 
+	/*
+	 * We want to zero s->map[i]->page[j] and putpage(pg),
+	 * but we have to make sure other processors flush the
+	 * entry from their TLBs before the page is freed.
+	 */
+	if(s->ref > 1)
+		procflushseg(s);
+
 	soff = start-s->base;
 	j = (soff&(PTEMAPMEM-1))/BY2PG;
 
 	size = s->mapsize;
-	list = nil;
 	for(i = soff/PTEMAPMEM; i < size; i++) {
 		if(pages <= 0)
-			break;
-		if(s->map[i] == 0) {
+			return;
+		if(s->map[i] == nil) {
 			pages -= PTEPERTAB-j;
 			j = 0;
 			continue;
@@ -517,42 +504,16 @@
 		}
 		while(j < PTEPERTAB) {
 			pg = s->map[i]->pages[j];
-			/*
-			 * We want to zero s->map[i]->page[j] and putpage(pg),
-			 * but we have to make sure other processors flush the
-			 * entry from their TLBs before the page is freed.
-			 * We construct a list of the pages to be freed, zero
-			 * the entries, then (below) call procflushseg, and call
-			 * putpage on the whole list.
-			 *
-			 * Swapped-out pages don't appear in TLBs, so it's okay
-			 * to putswap those pages before procflushseg.
-			 */
-			if(pg){
-				if(onswap(pg))
-					putswap(pg);
-				else{
-					pg->next = list;
-					list = pg;
-				}
-				s->map[i]->pages[j] = 0;
+			if(pg != nil){
+				s->map[i]->pages[j] = nil;
+				putpage(pg);
 			}
 			if(--pages == 0)
-				goto out;
+				return;
 			j++;
 		}
 		j = 0;
 	}
-out:
-	/* flush this seg in all other processes */
-	if(s->ref > 1)
-		procflushseg(s);
-
-	/* free the pages */
-	for(pg = list; pg != nil; pg = list){
-		list = list->next;
-		putpage(pg);
-	}
 }
 
 Segment*
@@ -565,7 +526,7 @@
 	newtop = va+len;
 	for(i = 0; i < NSEG; i++) {
 		ns = p->seg[i];
-		if(ns == 0)
+		if(ns == nil)
 			continue;
 		if((newtop > ns->base && newtop <= ns->top) ||
 		   (va >= ns->base && va < ns->top))
@@ -594,7 +555,6 @@
 		unlock(&physseglock);
 		return -1;
 	}
-
 	*ps = *new;
 	unlock(&physseglock);
 
@@ -700,13 +660,13 @@
 void
 pteflush(Pte *pte, int s, int e)
 {
+	Page *pg;
 	int i;
-	Page *p;
 
 	for(i = s; i < e; i++) {
-		p = pte->pages[i];
-		if(pagedout(p) == 0)
-			memset(p->cachectl, PG_TXTFLUSH, sizeof(p->cachectl));
+		pg = pte->pages[i];
+		if(!pagedout(pg))
+			memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl));
 	}
 }
 
@@ -741,7 +701,7 @@
 			pe = PGROUND(pe);
 		}
 		if(pe == ps) {
-			qunlock(&s->lk);
+			qunlock(s);
 			error(Ebadarg);
 		}
 
@@ -755,7 +715,7 @@
 		if(len > 0 && addr < s->top)
 			goto more;
 
-		qunlock(&s->lk);
+		qunlock(s);
 	}
 	flushmmu();
 	return 0;
@@ -767,7 +727,7 @@
 	Segment *s;
 
 	s = up->seg[TSEG];
-	if(s == 0 || s->profile == 0)
+	if(s == nil || s->profile == nil)
 		return;
 
 	s->profile[0] += TK2MS(1);
--- a/sys/src/9/port/swap.c
+++ b/sys/src/9/port/swap.c
@@ -18,20 +18,8 @@
 static	Page	**iolist;
 static	int	ioptr;
 
-static	ulong	genage, genclock, gencount;
-static	uvlong	gensum;
+static	ushort	ageclock;
 
-static void
-gentick(void)
-{
-	genclock++;
-	if(gencount)
-		genage = gensum / gencount;
-	else
-		genage = 0;
-	gensum = gencount = 0;
-}
-
 void
 swapinit(void)
 {
@@ -59,13 +47,10 @@
 		unlock(&swapalloc);
 		return ~0;
 	}
-
 	look = memchr(swapalloc.last, 0, swapalloc.top-swapalloc.last);
-	if(look == 0)
-		panic("inconsistent swap");
-
+	if(look == nil)
+		look = memchr(swapalloc.swmap, 0, swapalloc.last-swapalloc.swmap);
 	*look = 2;	/* ref for pte + io transaction */
-
 	swapalloc.last = look;
 	swapalloc.free--;
 	unlock(&swapalloc);
@@ -91,17 +76,12 @@
 				if(*idx == 255) {
 					*idx = 0;
 					swapalloc.free++;
-					if(idx < swapalloc.last)
-						swapalloc.last = idx;
 				}
 			}
 		}
 	} else {
-		if(--(*idx) == 0) {
+		if(--(*idx) == 0)
 			swapalloc.free++;
-			if(idx < swapalloc.last)
-				swapalloc.last = idx;
-		}
 	}
 	unlock(&swapalloc);
 }
@@ -131,18 +111,40 @@
 void
 kickpager(void)
 {
-	static int started;
+	static Ref started;
 
-	if(started)
+	if(started.ref || incref(&started) != 1)
 		wakeup(&swapalloc.r);
-	else {
+	else
 		kproc("pager", pager, 0);
-		started = 1;
+}
+
+extern int pagereclaim(Image*,int);	/* page.c */
+extern int imagereclaim(int);		/* segment.c */
+
+static int
+reclaim(void)
+{
+	int n;
+
+	for(;;){
+		if((n = pagereclaim(&fscache, 1000)) > 0) {
+			if(0) print("reclaim: %d fscache\n", n);
+		} else if((n = pagereclaim(&swapimage, 1000)) > 0) {
+			if(0) print("reclaim: %d swap\n", n);
+		} else if((n = imagereclaim(1000)) > 0) {
+			if(0) print("reclaim: %d image\n", n);
+		}
+		if(!needpages(nil))
+			return 1;	/* have pages, done */
+		if(n == 0)
+			return 0;	/* didnt reclaim, need to swap */
+		sched();
 	}
 }
 
 static void
-pager(void *junk)
+pager(void*)
 {
 	int i;
 	Segment *s;
@@ -153,60 +155,60 @@
 
 	while(waserror())
 		;
-loop:
-	up->psstate = "Idle";
-	wakeup(&palloc.r);
-	sleep(&swapalloc.r, needpages, 0);
 
-	while(needpages(junk)) {
-		if(swapimage.c && swapalloc.free) {
-			p++;
-			if(p >= ep){
-				p = proctab(0);
-				gentick();			
-			}
+	for(;;){
+		up->psstate = "Reclaim";
+		if(reclaim()){
+			up->psstate = "Idle";
+			wakeup(&palloc.r);
+			sleep(&swapalloc.r, needpages, nil);
+			continue;
+		}
 
-			if(p->state == Dead || p->noswap)
-				continue;
+		if(swapimage.c == nil || swapalloc.free == 0){
+			killbig("out of memory");
+			freebroken();		/* can use the memory */
+			sched();
+			continue;
+		}
 
-			if(!canqlock(&p->seglock))
-				continue;		/* process changing its segments */
+		p++;
+		if(p >= ep){
+			p = proctab(0);
+			ageclock++;
+		}
 
-			for(i = 0; i < NSEG; i++) {
-				if(!needpages(junk)){
-					qunlock(&p->seglock);
-					goto loop;
-				}
+		if(p->state == Dead || p->noswap)
+			continue;
 
-				if(s = p->seg[i]) {
-					switch(s->type&SG_TYPE) {
-					default:
-						break;
-					case SG_TEXT:
-						pageout(p, s);
-						break;
-					case SG_DATA:
-					case SG_BSS:
-					case SG_STACK:
-					case SG_SHARED:
-						up->psstate = "Pageout";
-						pageout(p, s);
-						if(ioptr != 0) {
-							up->psstate = "I/O";
-							executeio();
-						}
-						break;
-					}
+		if(!canqlock(&p->seglock))
+			continue;		/* process changing its segments */
+
+		up->psstate = "Pageout";
+		for(i = 0; i < NSEG; i++) {
+			if((s = p->seg[i]) != nil) {
+				switch(s->type&SG_TYPE) {
+				default:
+					break;
+				case SG_TEXT:
+					pageout(p, s);
+					break;
+				case SG_DATA:
+				case SG_BSS:
+				case SG_STACK:
+				case SG_SHARED:
+					pageout(p, s);
+					break;
 				}
 			}
-			qunlock(&p->seglock);
-		} else {
-			killbig("out of memory");
-			freebroken();		/* can use the memory */
-			sched();
 		}
+		qunlock(&p->seglock);
+
+		if(ioptr > 0) {
+			up->psstate = "I/O";
+			executeio();
+		}
 	}
-	goto loop;
 }
 
 static void
@@ -213,26 +215,26 @@
 pageout(Proc *p, Segment *s)
 {
 	int type, i, size;
-	ulong age;
+	short age;
 	Pte *l;
 	Page **pg, *entry;
 
-	if(!canqlock(&s->lk))	/* We cannot afford to wait, we will surely deadlock */
+	if(!canqlock(s))	/* We cannot afford to wait, we will surely deadlock */
 		return;
 
 	if(s->steal) {		/* Protected by /dev/proc */
-		qunlock(&s->lk);
+		qunlock(s);
 		return;
 	}
 
 	if(!canflush(p, s)) {	/* Able to invalidate all tlbs with references */
-		qunlock(&s->lk);
+		qunlock(s);
 		putseg(s);
 		return;
 	}
 
 	if(waserror()) {
-		qunlock(&s->lk);
+		qunlock(s);
 		putseg(s);
 		return;
 	}
@@ -248,30 +250,19 @@
 			entry = *pg;
 			if(pagedout(entry))
 				continue;
-
 			if(entry->modref & PG_REF) {
 				entry->modref &= ~PG_REF;
-				entry->gen = genclock;
+				entry->refage = ageclock;
+				continue;
 			}
-
-			if(genclock < entry->gen)
-				age = ~(entry->gen - genclock);
-			else
-				age = genclock - entry->gen;
-			gensum += age;
-			gencount++;
-			if(age <= genage)
+			age = (short)(ageclock - entry->refage);
+			if(age < 16)
 				continue;
-
 			pagepte(type, pg);
-
-			if(ioptr >= conf.nswppo)
-				goto out;
 		}
 	}
-out:
 	poperror();
-	qunlock(&s->lk);
+	qunlock(s);
 	putseg(s);
 }
 
@@ -281,14 +272,8 @@
 	int i;
 	Proc *ep;
 
-	lock(s);
-	if(s->ref == 1) {		/* Easy if we are the only user */
-		s->ref++;
-		unlock(s);
+	if(incref(s) == 2)		/* Easy if we are the only user */
 		return canpage(p);
-	}
-	s->ref++;
-	unlock(s);
 
 	/* Now we must do hardwork to ensure all processes which have tlb
 	 * entries for this segment will be flushed if we succeed in paging it out
@@ -317,7 +302,7 @@
 	switch(type) {
 	case SG_TEXT:				/* Revert to demand load */
 		putpage(outp);
-		*pg = 0;
+		*pg = nil;
 		break;
 
 	case SG_DATA:
@@ -324,6 +309,9 @@
 	case SG_BSS:
 	case SG_STACK:
 	case SG_SHARED:
+		if(ioptr >= conf.nswppo)
+			break;
+
 		/*
 		 *  get a new swap address with swapcount 2, one for the pte
 		 *  and one extra ref for us while we write the page to disk
@@ -335,8 +323,6 @@
 		/* clear any pages referring to it from the cache */
 		cachedel(&swapimage, daddr);
 
-		lock(outp);
-
 		/* forget anything that it used to cache */
 		uncachepage(outp);
 
@@ -348,7 +334,6 @@
 		outp->daddr = daddr;
 		cachepage(outp, &swapimage);
 		*pg = (Page*)(daddr|PG_ONSWAP);
-		unlock(outp);
 
 		/* Add page to IO transaction list */
 		iolist[ioptr++] = outp;
@@ -365,23 +350,10 @@
 		ioptr);
 }
 
-static int
-pageiocomp(void *a, void *b)
-{
-	Page *p1, *p2;
-
-	p1 = *(Page **)a;
-	p2 = *(Page **)b;
-	if(p1->daddr > p2->daddr)
-		return 1;
-	else
-		return -1;
-}
-
 static void
 executeio(void)
 {
-	Page *out;
+	Page *outp;
 	int i, n;
 	Chan *c;
 	char *kaddr;
@@ -388,21 +360,24 @@
 	KMap *k;
 
 	c = swapimage.c;
-	qsort(iolist, ioptr, sizeof iolist[0], pageiocomp);
 	for(i = 0; i < ioptr; i++) {
 		if(ioptr > conf.nswppo)
 			panic("executeio: ioptr %d > %d", ioptr, conf.nswppo);
-		out = iolist[i];
+		outp = iolist[i];
 
-		/* only write when swap address still referenced */
-		if(swapcount(out->daddr) > 1){
-			k = kmap(out);
+		assert(outp->ref > 0);
+		assert(outp->image == &swapimage);
+		assert(outp->daddr != ~0);
+
+		/* only write when swap address still in use */
+		if(swapcount(outp->daddr) > 1){
+			k = kmap(outp);
 			kaddr = (char*)VA(k);
 
 			if(waserror())
-				panic("executeio: page out I/O error");
+				panic("executeio: page outp I/O error");
 
-			n = devtab[c->type]->write(c, kaddr, BY2PG, out->daddr);
+			n = devtab[c->type]->write(c, kaddr, BY2PG, outp->daddr);
 			if(n != BY2PG)
 				nexterror();
 
@@ -411,10 +386,10 @@
 		}
 
 		/* drop our extra swap reference */
-		putswap((Page*)out->daddr);
+		putswap((Page*)outp->daddr);
 
 		/* Free up the page after I/O */
-		putpage(out);
+		putpage(outp);
 	}
 	ioptr = 0;
 }
@@ -432,7 +407,7 @@
 	Dir d;
 	int n;
 
-	if(swapimage.c) {
+	if(swapimage.c != nil) {
 		if(swapalloc.free != conf.nswap){
 			cclose(c);
 			error(Einuse);
@@ -460,10 +435,4 @@
 	}
 	c->flag &= ~CCACHE;
 	swapimage.c = c;
-}
-
-int
-swapfull(void)
-{
-	return swapalloc.free < conf.nswap/10;
 }
--- a/sys/src/9/port/sysproc.c
+++ b/sys/src/9/port/sysproc.c
@@ -125,7 +125,7 @@
 		nexterror();
 	}
 	for(i = 0; i < NSEG; i++)
-		if(up->seg[i])
+		if(up->seg[i] != nil)
 			p->seg[i] = dupseg(up->seg, i, n);
 	qunlock(&p->seglock);
 	poperror();
@@ -338,7 +338,7 @@
 	nargs = 0;
 	if(indir){
 		argp = progarg;
-		while(*argp){
+		while(*argp != nil){
 			a = *argp++;
 			nbytes += strlen(a) + 1;
 			nargs++;
@@ -402,7 +402,7 @@
 		argp = argp0;
 
 	for(i=0; i<nargs; i++){
-		if(indir && *argp==0) {
+		if(indir && *argp==nil) {
 			indir = 0;
 			argp = argp0;
 		}
@@ -436,13 +436,13 @@
 	for(i = SSEG; i <= BSEG; i++) {
 		putseg(up->seg[i]);
 		/* prevent a second free if we have an error */
-		up->seg[i] = 0;
+		up->seg[i] = nil;
 	}
 	for(i = ESEG+1; i < NSEG; i++) {
 		s = up->seg[i];
-		if(s != 0 && (s->type&SG_CEXEC) != 0) {
+		if(s != nil && (s->type&SG_CEXEC) != 0) {
 			putseg(s);
-			up->seg[i] = 0;
+			up->seg[i] = nil;
 		}
 	}
 
@@ -449,7 +449,7 @@
 	/*
 	 * Close on exec
 	 */
-	if((f = up->fgrp) != nil){
+	if((f = up->fgrp) != nil) {
 		for(i=0; i<=f->maxfd; i++)
 			fdclose(i, CCEXEC);
 	}
@@ -481,7 +481,7 @@
 	 * Move the stack
 	 */
 	s = up->seg[ESEG];
-	up->seg[ESEG] = 0;
+	up->seg[ESEG] = nil;
 	s->base = USTKTOP-USTKSIZE;
 	s->top = USTKTOP;
 	relocateseg(s, USTKTOP-tstk);
@@ -570,7 +570,7 @@
 
 	ms = va_arg(list, long);
 	if(ms <= 0) {
-		if (up->edf && (up->edf->flags & Admitted))
+		if (up->edf != nil && (up->edf->flags & Admitted))
 			edfyield();
 		else
 			yield();
@@ -597,7 +597,7 @@
 	char buf[ERRMAX];
 
 	status = va_arg(list, char*);
-	if(status){
+	if(status != nil){
 		if(waserror())
 			status = inval;
 		else{
@@ -714,7 +714,7 @@
 {
 	int (*f)(void*, char*);
 	f = va_arg(list, void*);
-	if(f != 0)
+	if(f != nil)
 		validaddr((uintptr)f, sizeof(void*), 0);
 	up->notify = f;
 	return 0;
@@ -723,7 +723,7 @@
 uintptr
 sysnoted(va_list list)
 {
-	if(va_arg(list, int) !=NRSTR && !up->notified)
+	if(va_arg(list, int) != NRSTR && !up->notified)
 		error(Egreg);
 	return 0;
 }
@@ -738,7 +738,7 @@
 	addr = va_arg(list, uintptr);
 	for(i = 0; i < NSEG; i++) {
 		s = up->seg[i];
-		if(s == 0 || addr < s->base || addr >= s->top)
+		if(s == nil || addr < s->base || addr >= s->top)
 			continue;
 		switch(s->type&SG_TYPE) {
 		case SG_TEXT:
@@ -783,14 +783,14 @@
 		nexterror();
 	}
 
-	s = 0;
+	s = nil;
 	for(i = 0; i < NSEG; i++)
-		if(s = up->seg[i]) {
-			qlock(&s->lk);
+		if((s = up->seg[i]) != nil) {
+			qlock(s);
 			if((addr >= s->base && addr < s->top) ||
 			   (s->top == s->base && addr == s->base))
 				goto found;
-			qunlock(&s->lk);
+			qunlock(s);
 		}
 
 	error(Ebadarg);
@@ -800,11 +800,11 @@
 	 * Check we are not detaching the initial stack segment.
 	 */
 	if(s == up->seg[SSEG]){
-		qunlock(&s->lk);
+		qunlock(s);
 		error(Ebadarg);
 	}
-	up->seg[i] = 0;
-	qunlock(&s->lk);
+	up->seg[i] = nil;
+	qunlock(s);
 	putseg(s);
 	qunlock(&up->seglock);
 	poperror();
@@ -830,12 +830,12 @@
 	from = PGROUND(from);
 
 	if(to > s->top) {
-		qunlock(&s->lk);
+		qunlock(s);
 		error(Ebadarg);
 	}
 
 	mfreeseg(s, from, (to - from) / BY2PG);
-	qunlock(&s->lk);
+	qunlock(s);
 	flushmmu();
 	return 0;
 }
@@ -858,7 +858,7 @@
 	l = &REND(up->rgrp, tag);
 
 	lock(up->rgrp);
-	for(p = *l; p; p = p->rendhash) {
+	for(p = *l; p != nil; p = p->rendhash) {
 		if(p->rendtag == tag) {
 			*l = p->rendhash;
 			val = p->rendval;