ref: 8a0a5128be9870261845756ad9b924a613829f7b
dir: /snap.c/
#include <u.h> #include <libc.h> #include <fcall.h> #include <avl.h> #include "dat.h" #include "fns.h" #include "atomic.h" static char* dlsync(Dlist *dl) { char kvbuf[512]; Msg m; if(dl->ins == nil) return nil; if(checkflag(dl->ins, Bdirty)) enqueue(dl->ins); dl->hd = dl->ins->bp; if(dl->hd.addr == dl->tl.addr) dl->tl = dl->hd; m.op = Oinsert; dlist2kv(dl, &m, kvbuf, sizeof(kvbuf)); return btupsert(&fs->snap, &m, 1); } static void dlcachedel(Dlist *dl, int hdel) { uint h; Dlist *d, **p; h = ihash(dl->gen) ^ ihash(dl->bgen); if(hdel){ p = &fs->dlcache[h % fs->dlcmax]; for(d = *p; d != nil; d = d->chain){ if(d->gen == dl->gen && d->bgen == dl->bgen) break; p = &d->chain; } if(d != nil) *p = d->chain; } if(dl == fs->dlhead) fs->dlhead = dl->cnext; if(dl == fs->dltail) fs->dltail = dl->cprev; if(dl->cnext != nil) dl->cnext->cprev = dl->cprev; if(dl->cprev != nil) dl->cprev->cnext = dl->cnext; dl->cnext = nil; dl->cprev = nil; } static Dlist* dlcacheget(vlong gen, vlong bgen) { Dlist *dl; uint h; h = ihash(gen) ^ ihash(bgen); for(dl = fs->dlcache[h % nelem(fs->dlcache)]; dl != nil; dl = dl->chain) if(dl->gen == gen && dl->bgen == bgen) break; if(dl != nil) dlcachedel(dl, 0); return dl; } static Dlist* getdl(vlong gen, vlong bgen) { char kbuf[Dlksz], kvbuf[Dlkvpsz]; Dlist *dl, **p; uint h; Msg m; Kvp kv; Key k; if((dl = dlcacheget(gen, bgen)) != nil) return dl; if((dl = mallocz(sizeof(Dlist), 1)) == nil) return nil; kbuf[0] = Kdlist; PACK64(kbuf+1, gen); PACK64(kbuf+9, bgen); k.k = kbuf; k.nk = sizeof(kbuf); /* load up existing dlist */ if(btlookup(&fs->snap, &k, &kv, kvbuf, sizeof(kvbuf)) == nil){ kv2dlist(&kv, dl); if(dl->hd.addr != -1 && (dl->ins = getblk(dl->hd, 0)) == nil){ free(dl); return nil; } goto Found; } /* create a new one if it didn't exist */ dl->gen = gen; dl->bgen = bgen; dl->hd.addr = -1; dl->tl.addr = -1; dl->ins = nil; m.op = Oinsert; dlist2kv(dl, &m, kvbuf, sizeof(kvbuf)); if(btupsert(&fs->snap, &m, 1) != nil){ free(dl); return nil; } Found: h = ihash(gen) ^ ihash(bgen); p = &fs->dlcache[h % nelem(fs->dlcache)]; dl->chain = *p; *p = dl; return dl; } void putdl(Dlist *dl) { Dlist *dt; dlcachedel(dl, 0); dl->cprev = nil; dl->cnext = fs->dlhead; fs->dlhead = dl; while(fs->ctail != nil && fs->dlcount >= fs->dlcmax){ dt = fs->dltail; dlsync(dt); dlcachedel(dt, 1); dropblk(dt->ins); free(dt); } } static char* freedl(Dlist *dl, int docontents) { Bptr bp; Blk *b; char *p; bp = dl->hd; while(bp.addr != -1){ /* ugly: when we merge deadlists, we change their hash */ if((b = getblk(bp, GBnochk)) == nil) return Efs; if(docontents){ for(p = b->data; p != b->data+b->deadsz; p += 8){ if(blkdealloc(UNPACK64(p)) == -1){ dropblk(b); return Efs; } } } bp = b->deadp; freeblk(&fs->snap, b); dropblk(b); } return nil; } static char* mergedl(vlong succ, vlong gen, vlong bgen) { char *err, buf[2][Kvmax]; Dlist *d, *m; Msg msg[2]; Blk *b; // vlong x; err = nil; d = getdl(gen, bgen); m = getdl(succ, bgen); if(d == nil || m == nil){ err = Efs; goto Out; } if(m->ins == nil){ m->hd = d->hd; m->tl = d->tl; m->ins = d->ins; }else{ m->hd = d->hd; /* ugly: when we merge deadlists, we change their hash */ if((b = getblk(d->tl, GBnochk)) == nil) goto Out; msg[0].op = Odelete; dlist2kv(d, &msg[0], buf[0], sizeof(buf[0])); msg[1].op = Oinsert; dlist2kv(m, &msg[1], buf[1], sizeof(buf[1])); if((err = btupsert(&fs->snap, msg, 2)) != nil) sysfatal("merge deadlists: %s", err); packbp(b->buf+2, Blksz, &m->hd); enqueue(b); dropblk(b); // TODO: merge // if(m->ins->deadsz + d->ins->deadsz < Dlspc){ // p = d->ins->data; // q = m->ins->data + m->ins->deadsz; // for(i = 0; i < d->ins->deadsz; i += 8){ // m->ins->deadsz += 8; // x = UNPACK64(p); // PACK64(q, x); // p += 8; // q += 8; // } // } } Out: if(d != nil) putdl(d); if(m != nil) putdl(m); return err; } static vlong successor(vlong gen) { char *e, pfx[9]; vlong id, succ; Scan s; pfx[0] = Kslink; succ = -1; PACK64(pfx+1, gen); btnewscan(&s, pfx, sizeof(pfx)); if((e = btenter(&fs->snap, &s)) != nil) goto Err; if((e = btnext(&fs->snap, &s, &s.kv)) != nil) goto Err; if(!s.done) kv2link(&s.kv, &id, &succ); if((e = btnext(&fs->snap, &s, &s.kv)) != nil) goto Err; if(!s.done) sysfatal("multiple successors in cleanup"); btexit(&fs->snap, &s); return succ; Err: fprint(2, "error getting snap successor: %s", e); btexit(&fs->snap, &s); return -1; } static char* reclaimblocks(vlong gen, vlong prev) { char *e, pfx[9]; vlong succ; Dlist dl; Scan s; Msg m; succ = successor(gen); pfx[0] = Kdlist; if(succ == -1) PACK64(pfx+1, gen); else PACK64(pfx+1, succ); btnewscan(&s, pfx, sizeof(pfx)); if((e = btenter(&fs->snap, &s)) != nil) return e; while(1){ if((e = btnext(&fs->snap, &s, &s.kv)) != nil) break; if(s.done) break; kv2dlist(&s.kv, &dl); /* * delete the dlist before processing it; it's * better to leak blocks than to double free. */ m.op = Odelete; m.Kvp = s.kv; if((e = btupsert(&fs->snap, &m, 1)) != nil) break; if(succ == -1) e = freedl(&dl, 1); else if(dl.bgen > prev) e = freedl(&dl, 0); else e = mergedl(succ, dl.gen, dl.bgen); if(e != nil) break; } btexit(&fs->snap, &s); return e; } /* * Removes a label from a snapshot, allowing * it to be reclaimed if it is not a direct * predecessor of more than one other snapshot. * * If it has one successor and no label, then * it will be merged with that successor. */ char* delsnap(Tree *t, char *name) { char buf[2][Kvmax], *e; Msg m[2]; int nm; if(strcmp(name, "dump") == 0 || strcmp(name, "empty") == 0) return Ename; nm = 0; m[nm].op = Odelete; m[nm].k = buf[nm]; if((e = packlabel(buf[nm], sizeof(buf[nm]), name)) == nil) return Ename; m[nm].nk = e - m[nm].k; m[nm].v = nil; m[nm].nv = 0; nm++; t->nlbl--; if(t->nlbl == 0 && t->nsucc <= 1){ m[nm].op = Odelete; m[nm].k = buf[nm]; if((e = packsnap(buf[nm], sizeof(buf[nm]), t->gen)) == nil) return Ename; m[nm].nk = e - m[nm].k; m[nm].v = nil; m[nm].nv = 0; nm++; }else{ m[nm].op = Oinsert; tree2kv(t, &m[nm], buf[nm], sizeof(buf[nm])); nm++; } if((e = flushdlcache(1)) != nil) return e; if((e = btupsert(&fs->snap, m, nm)) != nil) return e; if((e = reclaimblocks(t->gen, t->prev)) != nil) return e; return nil; } /* * Attaches a label to a tree, incrementing * its reference count. This labelled snapshot * will show up in the dump. */ char* labelsnap(Tree *t, char *name) { char buf[2][Kvmax]; Msg m[2]; if(strcmp(name, "dump") == 0 || strcmp(name, "empty") == 0) return Ename; t->nlbl++; m[0].op = Oinsert; tree2kv(t, &m[0], buf[0], sizeof(buf[0])); m[1].op = Oinsert; lbl2kv(name, t->gen, &m[1], buf[1], sizeof(buf[1])); return btupsert(&fs->snap, m, 2); } /* * Updates a snapshot; keeps the generation the same if possible, * otherwise moves to a new generation. A snapshot may only stay * at the same generation as long as it is at the tip of a snapshot * list; once it's observable by a derived snapshot it must be * immutable. */ char* updatesnap(Tree **r, Tree *o, char *lbl) { char buf[4][Kvmax], *e; Msg m[4]; Tree *t; if(!o->dirty) return nil; /* this snap can be modified in place, so do that */ if(o->nlbl == 1 && o->nsucc == 0){ m[0].op = Oinsert; tree2kv(o, &m[0], buf[0], sizeof(buf[0])); if((e = btupsert(&fs->snap, &m[0], 1)) != nil) return e; o->dirty = 0; return nil; } /* update the old kvp */ o->nsucc++; o->nlbl--; m[0].op = Oinsert; tree2kv(o, &m[0], buf[0], sizeof(buf[0])); /* create the new one */ if((t = mallocz(sizeof(Tree), 1)) == nil) return Enomem; t->memref = 1; t->dirty = 0; t->nlbl = 1; t->nsucc = 0; t->ht = o->ht; t->bp = o->bp; t->prev = o->gen; t->gen = aincv(&fs->nextgen, 1); m[1].op = Oinsert; tree2kv(t, &m[1], buf[1], sizeof(buf[1])); m[2].op = Oinsert; link2kv(t->prev, t->gen, &m[2], buf[2], sizeof(buf[2])); m[3].op = Oinsert; lbl2kv(lbl, t->gen, &m[3], buf[3], sizeof(buf[3])); if((e = btupsert(&fs->snap, m, 4)) != nil){ free(t); return e; } /* only update the dirty status after we sync */ o->dirty = 0; closesnap(o); *r = t; return nil; } /* * open snapshot by label, returning a tree. */ Tree* opensnap(char *label) { char *p, buf[Kvmax]; Tree *t; vlong gen; Kvp kv; Key k; /* Klabel{"name"} => Ksnap{id} */ if((p = packlabel(buf, sizeof(buf), label)) == nil) return nil; k.k = buf; k.nk = p - buf; if(btlookup(&fs->snap, &k, &kv, buf, sizeof(buf)) != nil) return nil; if(kv.nv != Snapsz) abort(); gen = UNPACK64(kv.v + 1); if((t = mallocz(sizeof(Tree), 1)) == nil) goto Error; if((p = packsnap(buf, sizeof(buf), gen)) == nil) goto Error; k.k = buf; k.nk = p - buf; if(btlookup(&fs->snap, &k, &kv, buf, sizeof(buf)) != nil) abort(); if(unpacktree(t, kv.v, kv.nv) == nil) abort(); ainc(&t->memref); return t; Error: free(t); return nil; } /* * close snapshot, flushing and freeing in-memory * representation. */ void closesnap(Tree *t) { if(t == nil || adec(&t->memref) != 0) return; assert(!t->dirty); free(t); } char* flushdlcache(int clear) { Dlist *dl, *n; char *e; for(dl = fs->dlhead; dl != nil; dl = n){ n = dl->cnext; if((e = dlsync(dl)) != nil) return e; if(clear){ if(dl->ins != nil) dropblk(dl->ins); free(dl); } } if(clear){ fs->dlhead = nil; fs->dltail = nil; memset(fs->dlcache, 0, fs->dlcmax*sizeof(Dlist*)); } return nil; } /* * Marks a block as killed by the tree * t, which means that it will be free * for use after t is reclaimed. * * t must be an active snapshot with * no successors. */ int killblk(Tree *t, Bptr bp) { Dlist *dl; Blk *b; char *p; if((dl = getdl(t->gen, bp.gen)) == nil) return -1; if(dl->ins == nil || dl->ins->deadsz == Dlspc){ if((b = newblk(&fs->snap, Tdlist)) == nil){ putdl(dl); return -1; } if(dl->ins != nil){ enqueue(dl->ins); /* enqueuing updates the hashes */ if(dl->ins->bp.addr == dl->tl.addr) dl->tl = dl->ins->bp; b->deadp = dl->ins->bp; }else{ dl->tl = b->bp; b->deadp = (Bptr){-1, -1, -1}; } dl->hd = b->bp; dl->ins = b; } p = dl->ins->data + dl->ins->deadsz; dl->ins->flag |= Bdirty; dl->ins->deadsz += 8; PACK64(p, bp.addr); putdl(dl); return 0; }