shithub: gefs

ref: 3878b725987e7429d21fb916672523ac81edb0ca
dir: /snap.c/

View raw version
#include <u.h>
#include <libc.h>
#include <fcall.h>
#include <avl.h>

#include "dat.h"
#include "fns.h"
#include "atomic.h"

static char*
dlsync(Dlist *dl)
{
	char kvbuf[512];
	Msg m;

	if(dl->ins == nil)
		return nil;
	if(checkflag(dl->ins, Bdirty))
		enqueue(dl->ins);

	dl->hd = dl->ins->bp;
	if(dl->hd.addr == dl->tl.addr)
		dl->tl = dl->hd;
	m.op = Oinsert;
	dlist2kv(dl, &m, kvbuf, sizeof(kvbuf));
	return btupsert(&fs->snap, &m, 1);
}

static void
dlcachedel(Dlist *dl, int hdel)
{
	uint h;
	Dlist *d, **p;

	h = ihash(dl->gen) ^ ihash(dl->bgen);
	if(hdel){
		p = &fs->dlcache[h % fs->dlcmax];
		for(d = *p; d != nil; d = d->chain){
			if(d->gen == dl->gen && d->bgen == dl->bgen)
				break;
			p = &d->chain;
		}
		if(d != nil)
			*p  = d->chain;
	}
	if(dl == fs->dlhead)
		fs->dlhead = dl->cnext;
	if(dl == fs->dltail)
		fs->dltail = dl->cprev;
	if(dl->cnext != nil)
		dl->cnext->cprev = dl->cprev;
	if(dl->cprev != nil)
		dl->cprev->cnext = dl->cnext;
	dl->cnext = nil;
	dl->cprev = nil;
}

static Dlist*
dlcacheget(vlong gen, vlong bgen)
{
	Dlist *dl;
	uint h;

	h = ihash(gen) ^ ihash(bgen);
	for(dl = fs->dlcache[h %fs->dlcmax]; dl != nil; dl = dl->chain)
		if(dl->gen == gen && dl->bgen == bgen)
			break;
	if(dl != nil)
		dlcachedel(dl, 0);
	return dl;
}

static Dlist*
getdl(vlong gen, vlong bgen)
{
	char kbuf[Dlksz], kvbuf[Dlkvpsz];
	Dlist *dl, **p;
	uint h;
	Msg m;
	Kvp kv;
	Key k;

	if((dl = dlcacheget(gen, bgen)) != nil)
		return dl;
	if((dl = mallocz(sizeof(Dlist), 1)) == nil)
		return nil;
	kbuf[0] = Kdlist;
	PACK64(kbuf+1, gen);
	PACK64(kbuf+9, bgen);
	k.k = kbuf;
	k.nk = sizeof(kbuf);

	/* load up existing dlist */
	if(btlookup(&fs->snap, &k, &kv, kvbuf, sizeof(kvbuf)) == nil){
		kv2dlist(&kv, dl);
		if(dl->hd.addr != -1 && (dl->ins = getblk(dl->hd, 0)) == nil){
			free(dl);
			return nil;
		}
		goto Found;
	}

	/* create a new one if it didn't exist */
	dl->gen = gen;
	dl->bgen = bgen;
	dl->hd.addr = -1;
	dl->tl.addr = -1;
	dl->ins = nil;

	m.op = Oinsert;
	dlist2kv(dl, &m, kvbuf, sizeof(kvbuf));
	if(btupsert(&fs->snap, &m, 1) != nil){
		free(dl);
		return nil;
	}
Found:
	h = ihash(gen) ^ ihash(bgen);
	p = &fs->dlcache[h % nelem(fs->dlcache)];
	dl->chain = *p;
	*p = dl;
	return dl;
}

void
putdl(Dlist *dl)
{
	Dlist *dt;

	dlcachedel(dl, 0);
	dl->cprev = nil;
	dl->cnext = fs->dlhead;
	fs->dlhead = dl;

	while(fs->ctail != nil && fs->dlcount >= fs->dlcmax){
		dt = fs->dltail;
		dlsync(dt);
		dlcachedel(dt, 1);
		dropblk(dt->ins);
		free(dt);
	}
}

static char*
freedl(Dlist *dl, int docontents)
{
	Bptr bp;
	Blk *b;
	char *p;

	bp = dl->hd;
	while(bp.addr != -1){
		/* ugly: when we merge deadlists, we change their hash */
		if((b = getblk(bp, GBnochk)) == nil)
			return Efs;
		if(docontents){
			for(p = b->data; p != b->data+b->deadsz; p += 8){
				if(blkdealloc(UNPACK64(p)) == -1){
					dropblk(b);
					return Efs;
				}
			}
		}
		bp = b->deadp;
		freeblk(&fs->snap, b);
		dropblk(b);
	}
	return nil;
}

static char*
mergedl(vlong succ, vlong gen, vlong bgen)
{
	char *err, buf[2][Kvmax];
	Dlist *d, *m;
	Msg msg[2];
	Blk *b;
//	vlong x;

	err = nil;
	d = getdl(gen, bgen);
	m = getdl(succ, bgen);
	if(d == nil || m == nil){
		err = Efs;
		goto Out;
	}
	if(m->ins == nil){
		m->hd = d->hd;
		m->tl = d->tl;
		m->ins = d->ins;
	}else{
		m->hd = d->hd;
		/* ugly: when we merge deadlists, we change their hash */
		if((b = getblk(d->tl, GBnochk)) == nil)
			goto Out;
		msg[0].op = Odelete;
		dlist2kv(d, &msg[0], buf[0], sizeof(buf[0]));
		msg[1].op = Oinsert;
		dlist2kv(m, &msg[1], buf[1], sizeof(buf[1]));
		if((err = btupsert(&fs->snap, msg, 2)) != nil)
			sysfatal("merge deadlists: %s", err);

		packbp(b->buf+2, Blksz, &m->hd);
		enqueue(b);
		dropblk(b);

// TODO: merge
//		if(m->ins->deadsz + d->ins->deadsz < Dlspc){
//			p = d->ins->data;
//			q = m->ins->data + m->ins->deadsz;
//			for(i = 0; i < d->ins->deadsz; i += 8){
//				m->ins->deadsz += 8;
//				x = UNPACK64(p);
//				PACK64(q, x);
//				p += 8;
//				q += 8;
//			}
//		}
	}
Out:
	if(d != nil)
		putdl(d);
	if(m != nil)
		putdl(m);
	return err;
}

vlong
successor(vlong gen)
{
	char *e, pfx[9];
	vlong id, succ;
	Scan s;

	pfx[0] = Kslink;
	succ = -1;
	PACK64(pfx+1, gen);
	btnewscan(&s, pfx, sizeof(pfx));
	if((e = btenter(&fs->snap, &s)) != nil)
		goto Err;
	if((e = btnext(&fs->snap, &s, &s.kv)) != nil)
		goto Err;
	if(!s.done)
		kv2link(&s.kv, &id, &succ);
	if((e = btnext(&fs->snap, &s, &s.kv)) != nil)
		goto Err;
	if(!s.done)
		sysfatal("multiple successors in cleanup");
	btexit(&fs->snap, &s);
	return succ;
Err:
	fprint(2, "error getting snap successor: %s", e);
	btexit(&fs->snap, &s);
	return -1;
}

static char*
reclaimblocks(vlong gen, vlong succ, vlong prev)
{
	char *e, pfx[9];
	Dlist dl;
	Scan s;
	Msg m;

	pfx[0] = Kdlist;
	PACK64(pfx+1, gen);
	btnewscan(&s, pfx, sizeof(pfx));
	if((e = btenter(&fs->snap, &s)) != nil)
		return e;
	while(1){
		if((e = btnext(&fs->snap, &s, &s.kv)) != nil)
			break;
		if(s.done)
			break;
		kv2dlist(&s.kv, &dl);

		/*
		 * delete the dlist before processing it; it's 
		 * better to leak blocks than to double free.
		 */
		m.op = Odelete;
		m.Kvp = s.kv;
		if((e = btupsert(&fs->snap, &m, 1)) != nil)
			break;
		if(succ == -1)
			e = freedl(&dl, 1);
		else if(dl.bgen > prev)
			e = freedl(&dl, 0);
		else
			e = mergedl(succ, dl.gen, dl.bgen);
		if(e != nil)
			break;
	}
	btexit(&fs->snap, &s);
	return e;
}

/*
 * Removes a label from a snapshot, allowing
 * it to be reclaimed if it is not a direct
 * predecessor of more than one other snapshot.
 *
 * If it has one successor and no label, then
 * it will be merged with that successor.
 */
char*
delsnap(Tree *t, vlong succ, char *name)
{
	char buf[2][Kvmax], *e;
	Msg m[2];
	int nm;

	nm = 0;
	if(name != nil){
		if(strcmp(name, "dump") == 0
		|| strcmp(name, "empty") == 0
		|| strcmp(name, "adm") == 0)
			return Ename;

		m[nm].op = Odelete;
		m[nm].k = buf[nm];
		if((e = packlabel(buf[nm], sizeof(buf[nm]), name)) == nil)
			return Ename;
		m[nm].nk = e - m[nm].k;
		m[nm].v = nil;
		m[nm].nv = 0;
		t->nlbl--;
		nm++;
	}
 
	if(t->nlbl == 0 && t->nsucc <= 1){
		m[nm].op = Odelete;
		m[nm].k = buf[nm];
		if((e = packsnap(buf[nm], sizeof(buf[nm]), t->gen)) == nil)
			return Ename;
		m[nm].nk = e - m[nm].k;
		m[nm].v = nil;
		m[nm].nv = 0;
		nm++;
	}else{
		m[nm].op = Oinsert;
		tree2kv(t, &m[nm], buf[nm], sizeof(buf[nm]));
		nm++;
	}
	if((e = flushdlcache(1)) != nil)
		return e;
	if((e = btupsert(&fs->snap, m, nm)) != nil)
		return e;
	if((e = reclaimblocks(t->gen, succ, t->prev)) != nil)
		return e;
	return nil;
}

/*
 * Attaches a label to a tree, incrementing
 * its reference count. This labelled snapshot
 * will show up in the dump.
 */
char*
labelsnap(Tree *t, char *name)
{
	char buf[2][Kvmax];
	Msg m[2];

	if(strcmp(name, "dump") == 0
	|| strcmp(name, "empty") == 0
	|| strcmp(name, "adm") == 0)
		return Ename;
	t->nlbl++;
	m[0].op = Oinsert;
	tree2kv(t, &m[0], buf[0], sizeof(buf[0]));
	m[1].op = Oinsert;
	lbl2kv(name, t->gen, &m[1], buf[1], sizeof(buf[1]));

	return btupsert(&fs->snap, m, 2);
}

/*
 * Updates a snapshot; keeps the generation the same if possible,
 * otherwise moves to a new generation. A snapshot may only stay
 * at the same generation as long as it is at the tip of a snapshot
 * list; once it's observable by a derived snapshot it must be
 * immutable.
 */
char*
updatesnap(Tree **r, Tree *o, char *lbl)
{
	char buf[4][Kvmax], *e;
	Msg m[4];
	Tree *t;

	if(!o->dirty)
		return nil;

	/* update the old kvp */
	o->nsucc++;
	o->nlbl--;
	m[0].op = Oinsert;
	tree2kv(o, &m[0], buf[0], sizeof(buf[0]));

	/* create the new one */
	if((t = mallocz(sizeof(Tree), 1)) == nil)
		return Enomem;
	t->memref = 1;
	t->dirty = 0;

	t->nlbl = 1;
	t->nsucc = 0;
	t->ht = o->ht;
	t->bp = o->bp;
	if(o->nlbl == 0 && o->nsucc == 1)
		t->prev = o->prev;
	else
		t->prev = o->gen;
	t->gen = aincv(&fs->nextgen, 1);

	m[1].op = Oinsert;
	tree2kv(t, &m[1], buf[1], sizeof(buf[1]));
	m[2].op = Oinsert;
	link2kv(t->prev, t->gen, &m[2], buf[2], sizeof(buf[2]));
	m[3].op = Oinsert;
	lbl2kv(lbl, t->gen, &m[3], buf[3], sizeof(buf[3]));
	if((e = btupsert(&fs->snap, m, 4)) != nil){
		free(t);
		return e;
	}

	/* only update the dirty status after we sync */
	o->dirty = 0;

	/* this was the last ref to the snap */
	if(o->nlbl == 0 && o->nsucc == 1)
		delsnap(o, t->gen, nil);
	closesnap(o);
	*r = t;
	return nil;
}

/*
 * open snapshot by label, returning a tree.
 */
Tree*
opensnap(char *label)
{
	char *p, buf[Kvmax];
	Tree *t;
	vlong gen;
	Kvp kv;
	Key k;

	/* Klabel{"name"} => Ksnap{id} */
	if((p = packlabel(buf, sizeof(buf), label)) == nil)
		return nil;
	k.k = buf;
	k.nk = p - buf;
	if(btlookup(&fs->snap, &k, &kv, buf, sizeof(buf)) != nil)
		return nil;
	if(kv.nv != Snapsz)
		abort();
	gen = UNPACK64(kv.v + 1);

	if((t = mallocz(sizeof(Tree), 1)) == nil)
		goto Error;
	if((p = packsnap(buf, sizeof(buf), gen)) == nil)
		goto Error;
	k.k = buf;
	k.nk = p - buf;
	if(btlookup(&fs->snap, &k, &kv, buf, sizeof(buf)) != nil)
		abort();
	if(unpacktree(t, kv.v, kv.nv) == nil)
		abort();
	ainc(&t->memref);
	return t;
Error:
	free(t);
	return nil;
}

/*
 * close snapshot, flushing and freeing in-memory
 * representation.
 */
void
closesnap(Tree *t)
{
	if(t == nil || adec(&t->memref) != 0)
		return;
	assert(!t->dirty);
	free(t);
}

char*
flushdlcache(int clear)
{
	Dlist *dl, *n;
	char *e;

	for(dl = fs->dlhead; dl != nil; dl = n){
		n = dl->cnext;
		if((e = dlsync(dl)) != nil)
			return e;
		if(clear){
			if(dl->ins != nil)
				dropblk(dl->ins);
			free(dl);
		}
	}
	if(clear){
		fs->dlhead = nil;
		fs->dltail = nil;
		memset(fs->dlcache, 0, fs->dlcmax*sizeof(Dlist*));
	}
	return nil;
}

/*
 * Marks a block as killed by the tree
 * t, which means that it will be free
 * for use after t is reclaimed.
 *
 * t must be an active snapshot with
 * no successors.
 */
int
killblk(Tree *t, Bptr bp)
{
	Dlist *dl;
	Blk *b;
	char *p;

	if((dl = getdl(t->gen, bp.gen)) == nil)
		return -1;
	if(dl->ins == nil || dl->ins->deadsz == Dlspc){
		if((b = newblk(&fs->snap, Tdlist)) == nil){
			putdl(dl);
			return -1;
		}
		if(dl->ins != nil){
			enqueue(dl->ins);
			/* enqueuing updates the hashes */
			if(dl->ins->bp.addr == dl->tl.addr)
				dl->tl = dl->ins->bp;
			b->deadp = dl->ins->bp;
		}else{
			dl->tl = b->bp;
			b->deadp = (Bptr){-1, -1, -1};
		}
		dl->hd = b->bp;
		dl->ins = b;
	}
	p = dl->ins->data + dl->ins->deadsz;
	dl->ins->flag |=  Bdirty;
	dl->ins->deadsz += 8;
	PACK64(p, bp.addr);
	putdl(dl);
	return 0;
}