ref: cb35d1a132f881aac7151174fdb82ec9ff84682e
parent: 8ac28ac11cb9e96329e4d083208b3f13ee8d2b8a
author: cinap_lenrek <[email protected]>
date: Sat Dec 20 23:46:22 EST 2014
kernel: avoid inconsistent reads in /proc/#/fd and /proc/#/ns to allow bytewise access to /proc/#/fd, the contents of the file where recreated on each call. if fd's had been closed or reassigned between the reads, the offset would be inconsistent and a read could start off in the middle of a line. this happens when you cat /proc/#/fd file of a busy process that mutates its filedescriptor table. to fix this, we now return one line record at a time. if the line fits in the read size, then this means the next read will always start at the beginning of the next line record. we remember the consumed byte count in Chan.mrock and the current record in Chan.nrock. (these fields are free to usefor non-directory files) if a read comes in and the offset is the same as c->mrock, we do not need to regenerate the file and just render the next c->nrock's record. for reads smaller than the line count, we have to regenerate the content up to the offset and the race is still possible, but this should not be the common case. the same algorithm is now used for /proc/#/ns file, allowing a simpler reimplementation and getting rid of Mntwalk state strcture.
--- a/sys/src/9/port/devdup.c
+++ b/sys/src/9/port/devdup.c
@@ -109,7 +109,7 @@
fd = twicefd/2;
if(twicefd & 1){
c = fdtochan(fd, -1, 0, 1);
- procfdprint(c, fd, 0, buf, sizeof buf);
+ procfdprint(c, fd, buf, sizeof buf);
cclose(c);
return readstr((ulong)offset, va, n, buf);
}
--- a/sys/src/9/port/devproc.c
+++ b/sys/src/9/port/devproc.c
@@ -155,7 +155,6 @@
int procctlmemio(Proc*, uintptr, int, void*, int);
Chan* proctext(Chan*, Proc*);
int procstopped(void*);
-void mntscan(Mntwalk*, Proc*);
ulong procpagecount(Proc *);
static Traceevent *tevents;
@@ -400,6 +399,7 @@
case Qkregs:
case Qsegment:
case Qprofile:
+ case Qns:
case Qfd:
if(omode != OREAD)
error(Eperm);
@@ -428,12 +428,6 @@
nonone(p);
break;
- case Qns:
- if(omode != OREAD)
- error(Eperm);
- c->aux = smalloc(sizeof(Mntwalk));
- break;
-
case Qnotepg:
nonone(p);
pg = p->pgrp;
@@ -514,103 +508,6 @@
return n;
}
-
-static long
-procoffset(long offset, char *va, int *np)
-{
- if(offset > 0) {
- offset -= *np;
- if(offset < 0) {
- memmove(va, va+*np+offset, -offset);
- *np = -offset;
- }
- else
- *np = 0;
- }
- return offset;
-}
-
-static int
-procqidwidth(Chan *c)
-{
- char buf[32];
-
- return sprint(buf, "%lud", c->qid.vers);
-}
-
-int
-procfdprint(Chan *c, int fd, int w, char *s, int ns)
-{
- int n;
-
- if(w == 0)
- w = procqidwidth(c);
- n = snprint(s, ns, "%3d %.2s %C %4ld (%.16llux %*lud %.2ux) %5ld %8lld %s\n",
- fd,
- &"r w rw"[(c->mode&3)<<1],
- devtab[c->type]->dc, c->dev,
- c->qid.path, w, c->qid.vers, c->qid.type,
- c->iounit, c->offset, c->path->s);
- return n;
-}
-
-static int
-procfds(Proc *p, char *va, int count, long offset)
-{
- Fgrp *f;
- Chan *c;
- char buf[256];
- int n, i, w, ww;
- char *a;
-
- /* print to buf to avoid holding fgrp lock while writing to user space */
- if(count > sizeof buf)
- count = sizeof buf;
- a = buf;
-
- eqlock(&p->debug);
- f = p->fgrp;
- if(f == nil || p->dot == nil){
- qunlock(&p->debug);
- return 0;
- }
- lock(f);
- if(waserror()){
- unlock(f);
- qunlock(&p->debug);
- nexterror();
- }
-
- n = readstr(0, a, count, p->dot->path->s);
- n += snprint(a+n, count-n, "\n");
- offset = procoffset(offset, a, &n);
- /* compute width of qid.path */
- w = 0;
- for(i = 0; i <= f->maxfd; i++) {
- c = f->fd[i];
- if(c == nil)
- continue;
- ww = procqidwidth(c);
- if(ww > w)
- w = ww;
- }
- for(i = 0; i <= f->maxfd; i++) {
- c = f->fd[i];
- if(c == nil)
- continue;
- n += procfdprint(c, i, w, a+n, count-n);
- offset = procoffset(offset, a, &n);
- }
- unlock(f);
- qunlock(&p->debug);
- poperror();
-
- /* copy result to user space, now that locks are released */
- memmove(va, buf, n);
-
- return n;
-}
-
static void
procclose(Chan *c)
{
@@ -622,31 +519,8 @@
proctrace = nil;
unlock(&tlock);
}
- if(QID(c->qid) == Qns && c->aux != nil){
- free(c->aux);
- c->aux = nil;
- }
}
-static void
-int2flag(int flag, char *s)
-{
- if(flag == 0){
- *s = '\0';
- return;
- }
- *s++ = '-';
- if(flag & MAFTER)
- *s++ = 'a';
- if(flag & MBEFORE)
- *s++ = 'b';
- if(flag & MCREATE)
- *s++ = 'c';
- if(flag & MCACHE)
- *s++ = 'C';
- *s = '\0';
-}
-
static int
procargs(Proc *p, char *buf, int nbuf)
{
@@ -690,6 +564,127 @@
return p->pid != PID(c->qid) || p->waitq != nil;
}
+static void
+int2flag(int flag, char *s)
+{
+ if(flag == 0){
+ *s = '\0';
+ return;
+ }
+ *s++ = '-';
+ if(flag & MAFTER)
+ *s++ = 'a';
+ if(flag & MBEFORE)
+ *s++ = 'b';
+ if(flag & MCREATE)
+ *s++ = 'c';
+ if(flag & MCACHE)
+ *s++ = 'C';
+ *s = '\0';
+}
+
+static int
+readns1(Chan *c, Proc *p, char *buf, int nbuf)
+{
+ Pgrp *pg;
+ Mount *t, *cm;
+ Mhead *f, *mh;
+ ulong minid, bestmid;
+ char flag[10], *srv;
+ int i;
+
+ pg = p->pgrp;
+ if(pg == nil || p->dot == nil || p->pid != PID(c->qid))
+ error(Eprocdied);
+
+ bestmid = ~0;
+ minid = c->nrock;
+ if(minid == bestmid)
+ return 0;
+
+ rlock(&pg->ns);
+
+ mh = nil;
+ cm = nil;
+ for(i = 0; i < MNTHASH; i++) {
+ for(f = pg->mnthash[i]; f != nil; f = f->hash) {
+ for(t = f->mount; t != nil; t = t->next) {
+ if(t->mountid >= minid && t->mountid < bestmid) {
+ bestmid = t->mountid;
+ cm = t;
+ mh = f;
+ }
+ }
+ }
+ }
+
+ if(bestmid == ~0) {
+ c->nrock = bestmid;
+ i = snprint(buf, nbuf, "cd %s\n", p->dot->path->s);
+ } else {
+ c->nrock = bestmid+1;
+
+ int2flag(cm->mflag, flag);
+ if(strcmp(cm->to->path->s, "#M") == 0){
+ srv = srvname(cm->to->mchan);
+ i = snprint(buf, nbuf, "mount %s %s %s %s\n", flag,
+ srv==nil? cm->to->mchan->path->s : srv,
+ mh->from->path->s, cm->spec? cm->spec : "");
+ free(srv);
+ }else{
+ i = snprint(buf, nbuf, "bind %s %s %s\n", flag,
+ cm->to->path->s, mh->from->path->s);
+ }
+ }
+
+ runlock(&pg->ns);
+
+ return i;
+}
+
+int
+procfdprint(Chan *c, int fd, char *s, int ns)
+{
+ return snprint(s, ns, "%3d %.2s %C %4ld (%.16llux %lud %.2ux) %5ld %8lld %s\n",
+ fd,
+ &"r w rw"[(c->mode&3)<<1],
+ devtab[c->type]->dc, c->dev,
+ c->qid.path, c->qid.vers, c->qid.type,
+ c->iounit, c->offset, c->path->s);
+}
+
+static int
+readfd1(Chan *c, Proc *p, char *buf, int nbuf)
+{
+ Fgrp *fg;
+ int n, i;
+
+ fg = p->fgrp;
+ if(fg == nil || p->dot == nil || p->pid != PID(c->qid))
+ return 0;
+
+ if(c->nrock == 0){
+ c->nrock = 1;
+ return snprint(buf, nbuf, "%s\n", p->dot->path->s);
+ }
+
+ lock(fg);
+ n = 0;
+ for(;;){
+ i = c->nrock-1;
+ if(i < 0 || i > fg->maxfd)
+ break;
+ c->nrock++;
+ if(fg->fd[i] != nil){
+ n = procfdprint(fg->fd[i], i, buf, nbuf);
+ break;
+ }
+ }
+ unlock(fg);
+
+ return n;
+}
+
/*
* userspace can't pass negative file offset for a
* 64 bit kernel address, so we use 63 bit and sign
@@ -706,15 +701,13 @@
static long
procread(Chan *c, void *va, long n, vlong off)
{
- /* NSEG*32 was too small for worst cases */
- char *a, flag[10], *sps, *srv, statbuf[NSEG*64];
- int i, j, m, navail, ne, rsize;
+ char *a, *sps, statbuf[1024];
+ int i, j, navail, ne, rsize;
long l;
uchar *rptr;
uintptr addr;
ulong offset;
Confmem *cm;
- Mntwalk *mw;
Proc *p;
Segment *sg, *s;
Ureg kur;
@@ -753,14 +746,16 @@
switch(QID(c->qid)){
case Qargs:
eqlock(&p->debug);
- j = procargs(p, up->genbuf, sizeof up->genbuf);
+ j = procargs(p, statbuf, sizeof(statbuf));
qunlock(&p->debug);
if(offset >= j)
return 0;
if(offset+n > j)
n = j-offset;
- memmove(a, &up->genbuf[offset], n);
+ statbufread:
+ memmove(a, statbuf+offset, n);
return n;
+
case Qsyscall:
eqlock(&p->debug);
if(waserror()){
@@ -769,13 +764,12 @@
}
if(p->pid != PID(c->qid))
error(Eprocdied);
+ j = 0;
if(p->syscalltrace != nil)
- n = readstr(offset, a, n, p->syscalltrace);
- else
- n = 0;
+ j = readstr(offset, a, n, p->syscalltrace);
qunlock(&p->debug);
poperror();
- return n;
+ return j;
case Qmem:
addr = off2addr(off);
@@ -830,17 +824,15 @@
if(p->nnote == 0)
n = 0;
else {
- m = strlen(p->note[0].msg) + 1;
- if(m > n)
- m = n;
- memmove(va, p->note[0].msg, m-1);
- ((char*)va)[m-1] = '\0';
- p->nnote--;
+ i = strlen(p->note[0].msg) + 1;
+ if(i < n)
+ n = i;
+ memmove(a, p->note[0].msg, n-1);
+ a[n-1] = '\0';
+ if(--p->nnote == 0)
+ p->notepending = 0;
memmove(p->note, p->note+1, p->nnote*sizeof(Note));
- n = m;
}
- if(p->nnote == 0)
- p->notepending = 0;
poperror();
qunlock(&p->debug);
return n;
@@ -905,8 +897,7 @@
readnum(0, statbuf+j+NUMSIZE*6, NUMSIZE, procpagecount(p)*BY2PG/1024, NUMSIZE);
readnum(0, statbuf+j+NUMSIZE*7, NUMSIZE, p->basepri, NUMSIZE);
readnum(0, statbuf+j+NUMSIZE*8, NUMSIZE, p->priority, NUMSIZE);
- memmove(a, statbuf+offset, n);
- return n;
+ goto statbufread;
case Qsegment:
j = 0;
@@ -924,10 +915,7 @@
return 0;
if(offset+n > j)
n = j-offset;
- if(n == 0 && offset == 0)
- exhausted("segments");
- memmove(a, &statbuf[offset], n);
- return n;
+ goto statbufread;
case Qwait:
if(!canqlock(&p->qwaitr))
@@ -959,96 +947,55 @@
qunlock(&p->qwaitr);
poperror();
- n = snprint(a, n, "%d %lud %lud %lud %q",
+
+ j = snprint(statbuf, sizeof(statbuf), "%d %lud %lud %lud %q",
wq->w.pid,
wq->w.time[TUser], wq->w.time[TSys], wq->w.time[TReal],
wq->w.msg);
free(wq);
- return n;
+ if(j < n)
+ n = j;
+ offset = 0;
+ goto statbufread;
case Qns:
+ case Qfd:
eqlock(&p->debug);
if(waserror()){
qunlock(&p->debug);
nexterror();
}
- if(p->pgrp == nil || p->dot == nil || p->pid != PID(c->qid))
- error(Eprocdied);
- mw = c->aux;
- if(mw->cddone){
- qunlock(&p->debug);
- poperror();
- return 0;
- }
- mntscan(mw, p);
- if(mw->mh == nil){
- mw->cddone = 1;
- i = snprint(a, n, "cd %s\n", p->dot->path->s);
- qunlock(&p->debug);
- poperror();
- return i;
- }
- int2flag(mw->cm->mflag, flag);
- if(strcmp(mw->cm->to->path->s, "#M") == 0){
- srv = srvname(mw->cm->to->mchan);
- i = snprint(a, n, "mount %s %s %s %s\n", flag,
- srv==nil? mw->cm->to->mchan->path->s : srv,
- mw->mh->from->path->s, mw->cm->spec? mw->cm->spec : "");
- free(srv);
- }else
- i = snprint(a, n, "bind %s %s %s\n", flag,
- mw->cm->to->path->s, mw->mh->from->path->s);
+ if(offset == 0 || offset != c->mrock)
+ c->nrock = c->mrock = 0;
+ do {
+ if(QID(c->qid) == Qns)
+ j = readns1(c, p, statbuf, sizeof(statbuf));
+ else
+ j = readfd1(c, p, statbuf, sizeof(statbuf));
+ if(j == 0)
+ break;
+ c->mrock += j;
+ } while(c->mrock <= offset);
+ i = c->mrock - offset;
qunlock(&p->debug);
poperror();
- return i;
+ if(i <= 0)
+ return 0;
+ if(i < n)
+ n = i;
+ offset = j - i;
+ goto statbufread;
+
case Qnoteid:
return readnum(offset, va, n, p->noteid, NUMSIZE);
+
case Qppid:
return readnum(offset, va, n, p->parentpid, NUMSIZE);
- case Qfd:
- return procfds(p, va, n, offset);
+
}
error(Egreg);
return 0; /* not reached */
-}
-
-void
-mntscan(Mntwalk *mw, Proc *p)
-{
- Pgrp *pg;
- Mount *t;
- Mhead *f;
- int nxt, i;
- ulong last, bestmid;
-
- pg = p->pgrp;
- rlock(&pg->ns);
-
- nxt = 0;
- bestmid = ~0;
-
- last = 0;
- if(mw->mh != nil)
- last = mw->cm->mountid;
-
- for(i = 0; i < MNTHASH; i++) {
- for(f = pg->mnthash[i]; f != nil; f = f->hash) {
- for(t = f->mount; t != nil; t = t->next) {
- if(mw->mh == nil ||
- (t->mountid > last && t->mountid < bestmid)) {
- mw->cm = t;
- mw->mh = f;
- bestmid = mw->cm->mountid;
- nxt = 1;
- }
- }
- }
- }
- if(nxt == 0)
- mw->mh = nil;
-
- runlock(&pg->ns);
}
static long
--- a/sys/src/9/port/portdat.h
+++ b/sys/src/9/port/portdat.h
@@ -17,7 +17,6 @@
typedef struct Mntcache Mntcache;
typedef struct Mount Mount;
typedef struct Mntrpc Mntrpc;
-typedef struct Mntwalk Mntwalk;
typedef struct Mnt Mnt;
typedef struct Mhead Mhead;
typedef struct Note Note;
@@ -240,13 +239,6 @@
NSMAX = 1000,
NSLOG = 7,
NSCACHE = (1<<NSLOG),
-};
-
-struct Mntwalk /* state for /proc/#/ns */
-{
- int cddone;
- Mhead* mh;
- Mount* cm;
};
struct Mount
--- a/sys/src/9/port/portfns.h
+++ b/sys/src/9/port/portfns.h
@@ -224,7 +224,7 @@
ulong procalarm(ulong);
void procctl(void);
void procdump(void);
-int procfdprint(Chan*, int, int, char*, int);
+int procfdprint(Chan*, int, char*, int);
void procflushseg(Segment*);
int procindex(ulong);
void procinit0(void);