ref: 4a80d9d029891e056a7badeeea8e3b588efd694b
parent: f7c60230669e8e00bc794f07726070d577d5aa3f
author: cinap_lenrek <[email protected]>
date: Sun Feb 23 13:00:21 EST 2020
kernel: fix multiple devproc bugs and pid reuse issues devproc assumes that when we hold the Proc.debug qlock, the process will be prevented from exiting. but there is another race where the process has already exited and the Proc* slot gets reused. to solve this, on process creation we also have to acquire the debug qlock while initializing the fields of the process. this also means newproc() should only initialize fields *not* protected by the debug qlock. always acquire the Proc.debug qlock when changing strings in the proc structure to avoid doublefree on concurrent update. for changing the user string, we add a procsetuser() function that does this for auth.c and devcap. remove pgrpnote() from pgrp.c and replace by static postnotepg() in devproc. avoid the assumption that the Proc* entries returned by proctab() are continuous. fixed devproc permission issues: - make sure only eve can access /proc/trace - none should only be allowed to read its own /proc/n/text - move Proc.kp checks into procopen() pid reuse was not handled correctly, as we where only checking if a pid had a living process, but there still could be processes expecting a particular parentpid or noteid. this is now addressed with reference counted Pid structures which are organized in a hash table. read access to the hash table does not require locks which will be usefull for dtracy later.
--- a/sys/src/9/port/auth.c
+++ b/sys/src/9/port/auth.c
@@ -117,8 +117,7 @@
{
if(n!=4 || strncmp(a, "none", 4)!=0)
error(Eperm);
- kstrdup(&up->user, "none");
- up->basepri = PriNormal;
+ procsetuser(up, "none");
return n;
}
@@ -130,12 +129,14 @@
long
hostownerwrite(char *a, int n)
{
- char buf[128];
+ char buf[KNAMELEN];
if(!iseve())
error(Eperm);
- if(n <= 0 || n >= sizeof buf)
+ if(n <= 0)
error(Ebadarg);
+ if(n >= sizeof buf)
+ error(Etoolong);
memmove(buf, a, n);
buf[n] = 0;
@@ -143,8 +144,7 @@
srvrenameuser(eve, buf);
shrrenameuser(eve, buf);
kstrdup(&eve, buf);
- kstrdup(&up->user, buf);
- up->basepri = PriNormal;
+ procsetuser(up, buf);
return n;
}
--- a/sys/src/9/port/devcap.c
+++ b/sys/src/9/port/devcap.c
@@ -246,9 +246,7 @@
}
secfree(p);
- /* set user id */
- kstrdup(&up->user, to);
- up->basepri = PriNormal;
+ procsetuser(up, to);
secfree(cp);
poperror();
--- a/sys/src/9/port/devproc.c
+++ b/sys/src/9/port/devproc.c
@@ -74,7 +74,7 @@
Emask = Nevents - 1,
};
-#define STATSIZE (2*KNAMELEN+12+9*12)
+#define STATSIZE (2*28+12+9*12)
/*
* Status, fd, and ns are left fully readable (0444) because of their use in debugging,
* particularly on shared servers.
@@ -145,7 +145,6 @@
* 23 bits of process slot number + 1
* in vers,
* 32 bits of pid, for consistency checking
- * If notepg, c->pgrpid.path is pgrp slot, .vers is noteid.
*/
#define QSHIFT 5 /* location in qid of proc slot # */
@@ -186,7 +185,7 @@
if(s == 0){
strcpy(up->genbuf, "trace");
mkqid(&qid, Qtrace, -1, QTFILE);
- devdir(c, qid, up->genbuf, 0, eve, 0444, dp);
+ devdir(c, qid, up->genbuf, 0, eve, 0400, dp);
return 1;
}
@@ -206,10 +205,10 @@
pid = p->pid;
if(pid == 0)
return 0;
- sprint(up->genbuf, "%lud", pid);
/*
* String comparison is done in devwalk so name must match its formatted pid
*/
+ snprint(up->genbuf, sizeof(up->genbuf), "%lud", pid);
if(name != nil && strcmp(name, up->genbuf) != 0)
return -1;
mkqid(&qid, (s+1)<<QSHIFT, pid, QTDIR);
@@ -219,7 +218,7 @@
if(c->qid.path == Qtrace){
strcpy(up->genbuf, "trace");
mkqid(&qid, Qtrace, -1, QTFILE);
- devdir(c, qid, up->genbuf, 0, eve, 0444, dp);
+ devdir(c, qid, up->genbuf, 0, eve, 0400, dp);
return 1;
}
if(s >= nelem(procdir))
@@ -250,8 +249,6 @@
len *= sizeof(*q->profile);
}
break;
- }
- switch(QID(tab->qid)){
case Qwatchpt:
len = lenwatchpt(p);
break;
@@ -324,6 +321,52 @@
error(Eperm);
}
+static void
+changenoteid(Proc *p, ulong noteid)
+{
+ Proc *pp;
+ int i;
+
+ if(noteid <= 0)
+ error(Ebadarg);
+ if(noteid == p->noteid)
+ return;
+ if(noteid == p->pid){
+ setnoteid(p, noteid);
+ return;
+ }
+ for(i = 0; i < conf.nproc; i++){
+ pp = proctab(i);
+ if(pp->noteid != noteid || pp->kp)
+ continue;
+ if(strcmp(pp->user, p->user) == 0){
+ nonone(pp);
+ setnoteid(p, noteid);
+ return;
+ }
+ }
+ error(Eperm);
+}
+
+static void
+postnotepg(ulong noteid, char *n, int flag)
+{
+ Proc *p;
+ int i;
+
+ for(i = 0; i < conf.nproc; i++){
+ p = proctab(i);
+ if(p == up)
+ continue;
+ if(p->noteid != noteid || p->kp)
+ continue;
+ qlock(&p->debug);
+ if(p->noteid == noteid)
+ postnote(p, 0, n, flag);
+ qunlock(&p->debug);
+ }
+}
+
static void clearwatchpt(Proc *p);
static Chan*
@@ -330,7 +373,6 @@
procopen(Chan *c, int omode0)
{
Proc *p;
- Pgrp *pg;
Chan *tc;
int pid;
int omode;
@@ -339,7 +381,7 @@
return devopen(c, omode0, 0, 0, procgen);
if(QID(c->qid) == Qtrace){
- if (omode0 != OREAD)
+ if (omode0 != OREAD || !iseve())
error(Eperm);
lock(&tlock);
if (waserror()){
@@ -381,6 +423,7 @@
case Qtext:
if(omode != OREAD)
error(Eperm);
+ nonone(p);
qunlock(&p->debug);
poperror();
tc = proctext(c, p);
@@ -388,10 +431,11 @@
cclose(c);
return tc;
+ case Qstatus:
+ case Qppid:
case Qproc:
case Qkregs:
case Qsegment:
- case Qprofile:
case Qns:
case Qfd:
if(omode != OREAD)
@@ -398,39 +442,31 @@
error(Eperm);
break;
+ case Qargs:
+ case Qwait:
+ case Qnoteid:
+ if(omode == OREAD)
+ break;
+ case Qctl:
case Qnote:
- if(p->privatemem)
+ if(p->kp)
error(Eperm);
break;
- case Qmem:
- case Qctl:
- if(p->privatemem)
+ case Qnotepg:
+ if(p->kp || omode != OWRITE)
error(Eperm);
- nonone(p);
+ pid = p->noteid;
break;
- case Qargs:
- case Qnoteid:
- case Qstatus:
- case Qwait:
+ case Qmem:
case Qregs:
case Qfpregs:
+ case Qprofile:
case Qsyscall:
- case Qppid:
case Qwatchpt:
- nonone(p);
- break;
-
- case Qnotepg:
- nonone(p);
- pg = p->pgrp;
- if(pg == nil)
- error(Eprocdied);
- if(omode!=OWRITE)
+ if(p->kp || p->privatemem)
error(Eperm);
- c->pgrpid.path = pg->pgrpid+1;
- c->pgrpid.vers = p->noteid;
break;
default:
@@ -437,15 +473,12 @@
print("procopen %#lux\n", QID(c->qid));
error(Egreg);
}
+ nonone(p);
/* Affix pid to qid */
- if(p->state != Dead)
- c->qid.vers = p->pid;
-
- /* make sure the process slot didn't get reallocated while we were playing */
- coherence();
- if(p->pid != pid)
+ if(pid == 0)
error(Eprocdied);
+ c->qid.vers = pid;
tc = devopen(c, omode, 0, 0, procgen);
if(waserror()){
@@ -470,23 +503,31 @@
static int
procwstat(Chan *c, uchar *db, int n)
{
- Proc *p;
Dir *d;
+ Proc *p;
if(c->qid.type&QTDIR)
error(Eperm);
- if(QID(c->qid) == Qtrace)
+ switch(QID(c->qid)){
+ case Qnotepg:
+ case Qtrace:
return devwstat(c, db, n);
+ }
- p = proctab(SLOT(c->qid));
- nonone(p);
- d = nil;
+ d = smalloc(sizeof(Dir)+n);
+ if(waserror()){
+ free(d);
+ nexterror();
+ }
+ n = convM2D(db, n, &d[0], (char*)&d[1]);
+ if(n == 0)
+ error(Eshortstat);
+ p = proctab(SLOT(c->qid));
eqlock(&p->debug);
if(waserror()){
qunlock(&p->debug);
- free(d);
nexterror();
}
@@ -493,13 +534,10 @@
if(p->pid != PID(c->qid))
error(Eprocdied);
+ nonone(p);
if(strcmp(up->user, p->user) != 0 && !iseve())
error(Eperm);
- d = smalloc(sizeof(Dir)+n);
- n = convM2D(db, n, &d[0], (char*)&d[1]);
- if(n == 0)
- error(Eshortstat);
if(!emptystr(d->uid) && strcmp(d->uid, p->user) != 0){
if(!iseve())
error(Eperm);
@@ -511,6 +549,7 @@
qunlock(&p->debug);
poperror();
+ poperror();
free(d);
return n;
}
@@ -551,10 +590,8 @@
int n;
a = p->args;
- if(p->setargs){
- snprint(buf, nbuf, "%s [%s]", p->text, p->args);
- return strlen(buf);
- }
+ if(p->setargs)
+ return snprint(buf, nbuf, "%s [%s]", p->text, p->args);
n = p->nargs;
for(j = 0; j < nbuf - 1; j += m){
if(n <= 0)
@@ -836,24 +873,23 @@
static long
procread(Chan *c, void *va, long n, vlong off)
{
- char *a, *sps, statbuf[1024];
- int i, j, navail, ne, rsize;
- ulong l;
+ char statbuf[1024], *sps;
+ ulong offset;
+ int i, j, rsize;
uchar *rptr;
uintptr addr;
- ulong offset;
- Confmem *cm;
- Proc *p;
- Segment *sg, *s;
+ Segment *s;
Ureg kur;
Waitq *wq;
+ Proc *p;
- a = va;
offset = off;
if(c->qid.type & QTDIR)
- return devdirread(c, a, n, 0, 0, procgen);
+ return devdirread(c, va, n, 0, 0, procgen);
if(QID(c->qid) == Qtrace){
+ int navail, ne;
+
if(!eventsavailable(nil))
return 0;
@@ -879,33 +915,6 @@
error(Eprocdied);
switch(QID(c->qid)){
- case Qargs:
- eqlock(&p->debug);
- j = procargs(p, statbuf, sizeof(statbuf));
- qunlock(&p->debug);
- if(offset >= j)
- return 0;
- if(offset+n > j)
- n = j-offset;
- statbufread:
- memmove(a, statbuf+offset, n);
- return n;
-
- case Qsyscall:
- eqlock(&p->debug);
- if(waserror()){
- qunlock(&p->debug);
- nexterror();
- }
- if(p->pid != PID(c->qid))
- error(Eprocdied);
- j = 0;
- if(p->syscalltrace != nil)
- j = readstr(offset, a, n, p->syscalltrace);
- qunlock(&p->debug);
- poperror();
- return j;
-
case Qmem:
addr = off2addr(off);
if(addr < KZERO)
@@ -918,21 +927,27 @@
if(addr < (uintptr)end) {
if(addr+n > (uintptr)end)
n = (uintptr)end - addr;
- memmove(a, (char*)addr, n);
+ memmove(va, (char*)addr, n);
return n;
}
for(i=0; i<nelem(conf.mem); i++){
- cm = &conf.mem[i];
+ Confmem *cm = &conf.mem[i];
/* klimit-1 because klimit might be zero! */
if(cm->kbase <= addr && addr <= cm->klimit-1){
if(addr+n >= cm->klimit-1)
n = cm->klimit - addr;
- memmove(a, (char*)addr, n);
+ memmove(va, (char*)addr, n);
return n;
}
}
error(Ebadarg);
+ case Qnoteid:
+ return readnum(offset, va, n, p->noteid, NUMSIZE);
+
+ case Qppid:
+ return readnum(offset, va, n, p->parentpid, NUMSIZE);
+
case Qprofile:
s = p->seg[TSEG];
if(s == nil || s->profile == nil)
@@ -943,42 +958,13 @@
return 0;
if(offset+n > i)
n = i - offset;
- memmove(a, ((char*)s->profile)+offset, n);
+ memmove(va, ((char*)s->profile)+offset, n);
return n;
- case Qnote:
- eqlock(&p->debug);
- if(waserror()){
- qunlock(&p->debug);
- nexterror();
- }
- if(p->pid != PID(c->qid))
- error(Eprocdied);
- if(n < 1) /* must accept at least the '\0' */
- error(Etoosmall);
- if(p->nnote == 0)
- n = 0;
- else {
- i = strlen(p->note[0].msg) + 1;
- if(i < n)
- n = i;
- memmove(a, p->note[0].msg, n-1);
- a[n-1] = '\0';
- if(--p->nnote == 0)
- p->notepending = 0;
- memmove(p->note, p->note+1, p->nnote*sizeof(Note));
- }
- poperror();
- qunlock(&p->debug);
- return n;
-
case Qproc:
- if(offset >= sizeof(Proc))
- return 0;
- if(offset+n > sizeof(Proc))
- n = sizeof(Proc) - offset;
- memmove(a, ((char*)p)+offset, n);
- return n;
+ rptr = (uchar*)p;
+ rsize = sizeof(Proc);
+ goto regread;
case Qregs:
rptr = (uchar*)p->dbgreg;
@@ -1004,53 +990,48 @@
return 0;
if(offset+n > rsize)
n = rsize - offset;
- memmove(a, rptr+offset, n);
+ memmove(va, rptr+offset, n);
return n;
case Qstatus:
- if(offset >= STATSIZE)
- return 0;
- if(offset+n > STATSIZE)
- n = STATSIZE - offset;
-
sps = p->psstate;
if(sps == nil)
sps = statename[p->state];
+ j = snprint(statbuf, sizeof(statbuf),
+ "%-27s %-27s %-11s "
+ "%11lud %11lud %11lud "
+ "%11lud %11lud %11lud "
+ "%11lud %11lud %11lud\n",
+ p->text, p->user, sps,
+ tk2ms(p->time[TUser]),
+ tk2ms(p->time[TSys]),
+ tk2ms(MACHP(0)->ticks - p->time[TReal]),
+ tk2ms(p->time[TCUser]),
+ tk2ms(p->time[TCSys]),
+ tk2ms(p->time[TCReal]),
+ (ulong)(procpagecount(p)*BY2PG/1024),
+ p->basepri, p->priority);
+ statbufread:
+ if(offset >= j)
+ return 0;
+ if(offset+n > j)
+ n = j - offset;
+ memmove(va, statbuf+offset, n);
+ return n;
- memset(statbuf, ' ', sizeof statbuf);
- readstr(0, statbuf+0*KNAMELEN, KNAMELEN-1, p->text);
- readstr(0, statbuf+1*KNAMELEN, KNAMELEN-1, p->user);
- readstr(0, statbuf+2*KNAMELEN, 11, sps);
-
- j = 2*KNAMELEN + 12;
- for(i = 0; i < 6; i++) {
- l = p->time[i];
- if(i == TReal)
- l = MACHP(0)->ticks - l;
- readnum(0, statbuf+j+NUMSIZE*i, NUMSIZE, tk2ms(l), NUMSIZE);
- }
-
- readnum(0, statbuf+j+NUMSIZE*6, NUMSIZE, procpagecount(p)*BY2PG/1024, NUMSIZE);
- readnum(0, statbuf+j+NUMSIZE*7, NUMSIZE, p->basepri, NUMSIZE);
- readnum(0, statbuf+j+NUMSIZE*8, NUMSIZE, p->priority, NUMSIZE);
- goto statbufread;
-
case Qsegment:
j = 0;
for(i = 0; i < NSEG; i++) {
- sg = p->seg[i];
- if(sg == nil)
+ s = p->seg[i];
+ if(s == nil)
continue;
- j += sprint(statbuf+j, "%-6s %c%c %8p %8p %4ld\n",
- sname[sg->type&SG_TYPE],
- sg->type&SG_FAULT ? 'F' : (sg->type&SG_RONLY ? 'R' : ' '),
- sg->profile ? 'P' : ' ',
- sg->base, sg->top, sg->ref);
+ j += snprint(statbuf+j, sizeof(statbuf)-j,
+ "%-6s %c%c %8p %8p %4ld\n",
+ sname[s->type&SG_TYPE],
+ s->type&SG_FAULT ? 'F' : (s->type&SG_RONLY ? 'R' : ' '),
+ s->profile ? 'P' : ' ',
+ s->base, s->top, s->ref);
}
- if(offset >= j)
- return 0;
- if(offset+n > j)
- n = j-offset;
goto statbufread;
case Qwait:
@@ -1089,18 +1070,21 @@
wq->w.time[TUser], wq->w.time[TSys], wq->w.time[TReal],
wq->w.msg);
free(wq);
- if(j < n)
- n = j;
offset = 0;
goto statbufread;
+ }
+ eqlock(&p->debug);
+ if(waserror()){
+ qunlock(&p->debug);
+ nexterror();
+ }
+ if(p->pid != PID(c->qid))
+ error(Eprocdied);
+
+ switch(QID(c->qid)){
case Qns:
case Qfd:
- eqlock(&p->debug);
- if(waserror()){
- qunlock(&p->debug);
- nexterror();
- }
if(offset == 0 || offset != c->mrock)
c->nrock = c->mrock = 0;
do {
@@ -1115,7 +1099,6 @@
i = c->mrock - offset;
qunlock(&p->debug);
poperror();
-
if(i <= 0 || i > j)
return 0;
if(i < n)
@@ -1122,55 +1105,75 @@
n = i;
offset = j - i;
goto statbufread;
-
- case Qnoteid:
- return readnum(offset, va, n, p->noteid, NUMSIZE);
-
- case Qppid:
- return readnum(offset, va, n, p->parentpid, NUMSIZE);
+ case Qargs:
+ j = procargs(p, statbuf, sizeof(statbuf));
+ qunlock(&p->debug);
+ poperror();
+ goto statbufread;
+
case Qwatchpt:
- eqlock(&p->debug);
j = readwatchpt(p, statbuf, sizeof(statbuf));
qunlock(&p->debug);
- if(offset >= j)
- return 0;
- if(offset+n > j)
- n = j - offset;
+ poperror();
goto statbufread;
+ case Qsyscall:
+ n = 0;
+ if(p->syscalltrace != nil)
+ n = readstr(offset, va, n, p->syscalltrace);
+ break;
+
+ case Qnote:
+ if(n < 1) /* must accept at least the '\0' */
+ error(Etoosmall);
+ if(p->nnote == 0)
+ n = 0;
+ else {
+ i = strlen(p->note[0].msg) + 1;
+ if(i < n)
+ n = i;
+ memmove(va, p->note[0].msg, n-1);
+ ((char*)va)[n-1] = '\0';
+ if(--p->nnote == 0)
+ p->notepending = 0;
+ memmove(p->note, p->note+1, p->nnote*sizeof(Note));
+ }
+ break;
+
+ default:
+ print("unknown qid in procwread\n");
+ error(Egreg);
}
- error(Egreg);
- return 0; /* not reached */
+
+ qunlock(&p->debug);
+ poperror();
+ return n;
}
static long
procwrite(Chan *c, void *va, long n, vlong off)
{
- int id, m;
- Proc *p, *t, *et;
- char *a, *arg, buf[ERRMAX];
+ char buf[ERRMAX], *arg;
ulong offset;
+ Proc *p;
+ int m;
- a = va;
offset = off;
if(c->qid.type & QTDIR)
error(Eisdir);
- /* Use the remembered noteid in the channel rather
- * than the process pgrpid
- */
+ /* use the remembered noteid in the channel qid */
if(QID(c->qid) == Qnotepg) {
if(n >= ERRMAX-1)
error(Etoobig);
memmove(buf, va, n);
buf[n] = 0;
- pgrpnote(NOTEID(c->pgrpid), buf, NUser);
+ postnotepg(NOTEID(c->qid), buf, NUser);
return n;
}
p = proctab(SLOT(c->qid));
-
eqlock(&p->debug);
if(waserror()){
qunlock(&p->debug);
@@ -1193,8 +1196,8 @@
if(arg[m-1] != 0)
arg[m++] = 0;
free(p->args);
- p->nargs = m;
p->args = arg;
+ p->nargs = m;
p->setargs = 1;
break;
@@ -1229,9 +1232,7 @@
break;
case Qnote:
- if(p->kp)
- error(Eperm);
- if(n >= ERRMAX-1)
+ if(n >= sizeof(buf))
error(Etoobig);
memmove(buf, va, n);
buf[n] = 0;
@@ -1238,34 +1239,19 @@
if(!postnote(p, 0, buf, NUser))
error("note not posted");
break;
+
case Qnoteid:
- if(p->kp)
- error(Eperm);
- id = atoi(a);
- if(id <= 0)
- error(Ebadarg);
- if(id == p->pid) {
- p->noteid = id;
- break;
- }
- t = proctab(0);
- for(et = t+conf.nproc; t < et; t++) {
- if(t->state == Dead || t->kp)
- continue;
- if(id == t->noteid) {
- nonone(t);
- if(strcmp(p->user, t->user) != 0)
- error(Eperm);
- p->noteid = id;
- break;
- }
- }
- if(p->noteid != id)
- error(Ebadarg);
+ if(n >= sizeof(buf))
+ error(Etoobig);
+ memmove(buf, va, n);
+ buf[n] = 0;
+ changenoteid(p, atoi(buf));
break;
+
case Qwatchpt:
writewatchpt(p, va, n, off);
break;
+
default:
print("unknown qid in procwrite\n");
error(Egreg);
@@ -1352,7 +1338,8 @@
if(waserror()) {
up->psstate = state;
qlock(&p->debug);
- p->pdbg = nil;
+ if(p->pdbg == up)
+ p->pdbg = nil;
nexterror();
}
sleep(&up->sleep, procstopped, p);
@@ -1439,9 +1426,6 @@
vlong time;
char *e;
void (*pt)(Proc*, int, vlong);
-
- if(p->kp) /* no ctl requests to kprocs */
- error(Eperm);
cb = parsecmd(va, n);
if(waserror()){
--- a/sys/src/9/port/pgrp.c
+++ b/sys/src/9/port/pgrp.c
@@ -12,24 +12,6 @@
static Ref pgrpid;
static Ref mountid;
-void
-pgrpnote(ulong noteid, char *n, int flag)
-{
- Proc *p, *ep;
-
- p = proctab(0);
- for(ep = p+conf.nproc; p < ep; p++) {
- if(p->state == Dead)
- continue;
- if(up != p && p->noteid == noteid && p->kp == 0) {
- qlock(&p->debug);
- if(p->noteid == noteid)
- postnote(p, 0, n, flag);
- qunlock(&p->debug);
- }
- }
-}
-
Pgrp*
newpgrp(void)
{
--- a/sys/src/9/port/portdat.h
+++ b/sys/src/9/port/portdat.h
@@ -187,7 +187,6 @@
Mnt* mux; /* Mnt for clients using me for messages */
union {
void* aux;
- Qid pgrpid; /* for #p/notepg */
ulong mid; /* for ns in devproc */
};
Chan* mchan; /* channel to mounted server */
@@ -649,21 +648,22 @@
Mach *mach; /* machine running this proc */
char *text;
char *user;
+
char *args;
int nargs; /* number of bytes of args */
int setargs; /* process changed its args */
+
Proc *rnext; /* next process in run queue */
Proc *qnext; /* next process on queue for a QLock */
- QLock *qlock; /* addr of qlock being queued for DEBUG */
- int state;
+
char *psstate; /* What /proc/#/status reports */
- Segment *seg[NSEG];
- QLock seglock; /* locked whenever seg[] changes */
+ int state;
+
ulong pid;
ulong noteid; /* Equivalent of note group */
ulong parentpid;
- Proc *pidhash; /* next proc in pid hash */
+ Proc *parent; /* Process to send wait record on exit */
Lock exl; /* Lock count and waitq */
Waitq *waitq; /* Exited processes wait children */
int nchild; /* Number of living children */
@@ -670,7 +670,9 @@
int nwait; /* Number of uncollected wait records */
QLock qwaitr;
Rendez waitr; /* Place to hang out in wait */
- Proc *parent;
+
+ QLock seglock; /* locked whenever seg[] changes */
+ Segment *seg[NSEG];
Pgrp *pgrp; /* Process group for namespace */
Egrp *egrp; /* Environment group */
--- a/sys/src/9/port/portfns.h
+++ b/sys/src/9/port/portfns.h
@@ -215,8 +215,7 @@
ulong perfticks(void);
void pexit(char*, int);
void pgrpcpy(Pgrp*, Pgrp*);
-void pgrpnote(ulong, char*, int);
-int pidalloc(Proc*);
+ulong pidalloc(Proc*);
#define poperror() up->nerrlab--
void portcountpagerefs(ulong*, int);
int postnote(Proc*, int, char*, int);
@@ -226,7 +225,6 @@
void printinit(void);
ulong procalarm(ulong);
void procctl(void);
-void procdump(void);
int procfdprint(Chan*, int, char*, int);
void procflushseg(Segment*);
void procflushpseg(Physseg*);
@@ -235,6 +233,7 @@
void procinit0(void);
ulong procpagecount(Proc*);
void procpriority(Proc*, int, int);
+void procsetuser(Proc*, char*);
Proc* proctab(int);
extern void (*proctrace)(Proc*, int, vlong);
void procwired(Proc*, int);
@@ -304,7 +303,6 @@
Proc* runproc(void);
void savefpregs(FPsave*);
void sched(void);
-void scheddump(void);
void schedinit(void);
void (*screenputs)(char*, int);
void* secalloc(ulong);
@@ -318,6 +316,7 @@
void setkernur(Ureg*, Proc*);
int setlabel(Label*);
void setmalloctag(void*, uintptr);
+ulong setnoteid(Proc*, ulong);
void setrealloctag(void*, uintptr);
void setregisters(Ureg*, char*, char*, int);
void setupwatchpts(Proc*, Watchpt*, int);
--- a/sys/src/9/port/proc.c
+++ b/sys/src/9/port/proc.c
@@ -21,7 +21,6 @@
static struct Procalloc
{
Lock;
- Proc* ht[128];
Proc* arena;
Proc* free;
} procalloc;
@@ -53,8 +52,9 @@
"Waitrelease",
};
-static void pidfree(Proc*);
static void rebalance(void);
+static void pidinit(void);
+static void pidfree(Proc*);
/*
* Always splhi()'ed.
@@ -606,31 +606,14 @@
lock(&procalloc);
}
procalloc.free = p->qnext;
+ p->qnext = nil;
unlock(&procalloc);
- assert(p->state == Dead);
p->psstate = "New";
- p->mach = nil;
- p->eql = nil;
- p->qnext = nil;
- p->nchild = 0;
- p->nwait = 0;
- p->waitq = nil;
- p->parent = nil;
- p->pgrp = nil;
- p->egrp = nil;
- p->fgrp = nil;
- p->rgrp = nil;
- p->pdbg = nil;
p->fpstate = FPinit;
- p->kp = 0;
p->procctl = 0;
- p->syscalltrace = nil;
- p->notepending = 0;
p->ureg = nil;
p->dbgreg = nil;
- p->privatemem = 0;
- p->noswap = 0;
p->nerrlab = 0;
p->errstr = p->errbuf0;
p->syserrstr = p->errbuf1;
@@ -639,11 +622,6 @@
p->nlocks = 0;
p->delaysched = 0;
p->trace = 0;
- p->nargs = 0;
- p->setargs = 0;
- memset(p->seg, 0, sizeof p->seg);
- p->parentpid = 0;
- p->noteid = 0;
if(p->kstack == nil)
p->kstack = smalloc(KSTACK);
@@ -673,8 +651,8 @@
/* pick a machine to wire to */
memset(nwired, 0, sizeof(nwired));
p->wired = nil;
- pp = proctab(0);
- for(i=0; i<conf.nproc; i++, pp++){
+ for(i=0; i<conf.nproc; i++){
+ pp = proctab(i);
wm = pp->wired;
if(wm != nil && pp->pid)
nwired[wm->machno]++;
@@ -724,6 +702,8 @@
for(i=0; i<conf.nproc-1; i++, p++)
p->qnext = p+1;
p->qnext = nil;
+
+ pidinit();
}
/*
@@ -1082,7 +1062,7 @@
pexit(char *exitstr, int freemem)
{
Proc *p;
- Segment **s, **es;
+ Segment **s;
ulong utime, stime;
Waitq *wq;
Fgrp *fgrp;
@@ -1124,18 +1104,14 @@
if(pgrp != nil)
closepgrp(pgrp);
- /*
- * if not a kernel process and have a parent,
- * do some housekeeping.
- */
- if(up->kp)
- goto Nowait;
+ if(up->parentpid == 0){
+ if(exitstr == nil)
+ exitstr = "unknown";
+ panic("boot process died: %s", exitstr);
+ }
p = up->parent;
- if(p != nil){
- if(p->pid != up->parentpid || p->state == Broken)
- goto Nowait;
-
+ if(p != nil && p->pid == up->parentpid && p->state != Broken){
wq = smalloc(sizeof(Waitq));
wq->w.pid = up->pid;
utime = up->time[TUser] + up->time[TCUser];
@@ -1175,29 +1151,16 @@
if(wq != nil)
free(wq);
}
- else if(up->parentpid == 0){
- if(exitstr == nil)
- exitstr = "unknown";
- panic("boot process died: %s", exitstr);
- }
-Nowait:
if(!freemem)
addbroken(up);
- qlock(&up->seglock);
- es = &up->seg[NSEG];
- for(s = up->seg; s < es; s++) {
- if(*s != nil) {
- putseg(*s);
- *s = nil;
- }
- }
- qunlock(&up->seglock);
+ qlock(&up->debug);
lock(&up->exl); /* Prevent my children from leaving waits */
pidfree(up);
- up->pid = 0;
+ up->parent = nil;
+ up->nchild = up->nwait = 0;
wakeup(&up->waitr);
unlock(&up->exl);
@@ -1207,7 +1170,6 @@
}
/* release debuggers */
- qlock(&up->debug);
if(up->pdbg != nil) {
wakeup(&up->pdbg->sleep);
up->pdbg = nil;
@@ -1223,6 +1185,15 @@
up->nwatchpt = 0;
qunlock(&up->debug);
+ qlock(&up->seglock);
+ for(s = up->seg; s < &up->seg[NSEG]; s++) {
+ if(*s != nil) {
+ putseg(*s);
+ *s = nil;
+ }
+ }
+ qunlock(&up->seglock);
+
/* Sched must not loop for these locks */
lock(&procalloc);
lock(&palloc);
@@ -1281,7 +1252,8 @@
Proc*
proctab(int i)
{
- return &procalloc.arena[i];
+#define proctab(x) (&procalloc.arena[(x)])
+ return proctab(i);
}
void
@@ -1306,23 +1278,6 @@
p->lastlock ? p->lastlock->pc : 0, p->priority);
}
-void
-procdump(void)
-{
- int i;
- Proc *p;
-
- if(up != nil)
- print("up %lud\n", up->pid);
- else
- print("no current process\n");
- for(i=0; i<conf.nproc; i++) {
- p = &procalloc.arena[i];
- if(p->state != Dead)
- dumpaproc(p);
- }
-}
-
/*
* wait till all matching processes have flushed their mmu
*/
@@ -1339,7 +1294,7 @@
memset(await, 0, conf.nmach*sizeof(await[0]));
nwait = 0;
for(i = 0; i < conf.nproc; i++){
- p = &procalloc.arena[i];
+ p = proctab(i);
if(p->state != Dead && (*match)(p, a)){
p->newtlb = 1;
for(nm = 0; nm < conf.nmach; nm++){
@@ -1422,24 +1377,6 @@
}
void
-scheddump(void)
-{
- Proc *p;
- Schedq *rq;
-
- for(rq = &runq[Nrq-1]; rq >= runq; rq--){
- if(rq->head == nil)
- continue;
- print("rq%zd:", rq-runq);
- for(p = rq->head; p != nil; p = p->rnext)
- print(" %lud(%lud)", p->pid, m->ticks - p->readytime);
- print("\n");
- delay(150);
- }
- print("nrdy %d\n", nrdy);
-}
-
-void
kproc(char *name, void (*func)(void *), void *arg)
{
static Pgrp *kpgrp;
@@ -1447,6 +1384,7 @@
p = newproc();
+ qlock(&p->debug);
if(up != nil){
p->slash = up->slash;
p->dot = up->slash; /* unlike fork, do not inherit the dot for kprocs */
@@ -1460,9 +1398,12 @@
p->nnote = 0;
p->notify = nil;
p->notified = 0;
+ p->notepending = 0;
p->procmode = 0640;
+ p->privatemem = 1;
p->noswap = 1;
+ p->hang = 0;
p->kp = 1;
kprocchild(p, func, arg);
@@ -1470,6 +1411,8 @@
kstrdup(&p->text, name);
kstrdup(&p->user, eve);
kstrdup(&p->args, "");
+ p->nargs = 0;
+ p->setargs = 0;
if(kpgrp == nil)
kpgrp = newpgrp();
@@ -1480,8 +1423,10 @@
memset(p->time, 0, sizeof(p->time));
p->time[TReal] = MACHP(0)->ticks;
- p->noteid = pidalloc(p);
+ pidalloc(p);
+ qunlock(&p->debug);
+
procpriority(p, PriKproc, 0);
p->psstate = nil;
@@ -1597,12 +1542,12 @@
int i;
Segment *s;
ulong l, max;
- Proc *p, *ep, *kp;
+ Proc *p, *kp;
max = 0;
kp = nil;
- ep = procalloc.arena+conf.nproc;
- for(p = procalloc.arena; p < ep; p++) {
+ for(i = 0; i < conf.nproc; i++) {
+ p = proctab(i);
if(p->state == Dead || p->kp)
continue;
if((p->noswap || (p->procmode & 0222) == 0) && strcmp(eve, p->user) == 0)
@@ -1617,7 +1562,8 @@
return;
print("%lud: %s killed: %s\n", kp->pid, kp->text, why);
qlock(&kp->seglock);
- for(p = procalloc.arena; p < ep; p++) {
+ for(i = 0; i < conf.nproc; i++) {
+ p = proctab(i);
if(p->state == Dead || p->kp)
continue;
if(p != kp && p->seg[BSEG] != nil && p->seg[BSEG] == kp->seg[BSEG])
@@ -1649,14 +1595,27 @@
void
renameuser(char *old, char *new)
{
- Proc *p, *ep;
+ Proc *p;
+ int i;
- ep = procalloc.arena+conf.nproc;
- for(p = procalloc.arena; p < ep; p++)
+ for(i = 0; i < conf.nproc; i++){
+ p = proctab(i);
+ qlock(&p->debug);
if(p->user != nil && strcmp(old, p->user) == 0)
kstrdup(&p->user, new);
+ qunlock(&p->debug);
+ }
}
+void
+procsetuser(Proc *p, char *new)
+{
+ qlock(&p->debug);
+ kstrdup(&p->user, new);
+ p->basepri = PriNormal;
+ qunlock(&p->debug);
+}
+
/*
* time accounting called by clock() splhi'd
*/
@@ -1707,65 +1666,204 @@
m->load = load/100;
}
-int
-pidalloc(Proc *p)
+/*
+ * A Pid structure is a reference counted hashtable entry
+ * with "pid" being the key and "procindex" being the value.
+ * A entry is allocated atomically by changing the key from
+ * negative or zero to the positive process id number.
+ * Pid's outlive ther Proc's as long as other processes hold
+ * a reference to them such as noteid or parentpid.
+ * This prevents pid reuse when the pid generator wraps.
+ */
+typedef struct Pid Pid;
+struct Pid
{
- static int gen, wrapped;
- int pid, h;
- Proc *x;
+ Ref;
+ long pid;
+ int procindex;
+};
- lock(&procalloc);
-Retry:
- pid = ++gen & 0x7FFFFFFF;
- if(pid == 0){
- wrapped = 1;
- goto Retry;
+enum {
+ PIDMASK = 0x7FFFFFFF,
+ PIDSHIFT = 4, /* log2 bucket size of the hash table */
+};
+
+static Pid *pidhashtab;
+static ulong pidhashmask;
+
+static void
+pidinit(void)
+{
+ /*
+ * allocate 3 times conf.nproc Pid structures for the hash table
+ * and round up to a power of two as each process can reference
+ * up to 3 unique Pid structures:
+ * - pid
+ * - noteid
+ * - parentpid
+ */
+ pidhashmask = 1<<PIDSHIFT;
+ while(pidhashmask < conf.nproc*3)
+ pidhashmask <<= 1;
+
+ pidhashtab = xalloc(pidhashmask * sizeof(pidhashtab[0]));
+ if(pidhashtab == nil){
+ xsummary();
+ panic("cannot allocate pid hashtable of size %lud", pidhashmask);
}
- h = pid % nelem(procalloc.ht);
- if(wrapped)
- for(x = procalloc.ht[h]; x != nil; x = x->pidhash)
- if(x->pid == pid)
- goto Retry;
- if(p != nil){
- p->pid = pid;
- p->pidhash = procalloc.ht[h];
- procalloc.ht[h] = p;
+
+ /* make it a mask */
+ pidhashmask--;
+}
+
+static Pid*
+pidlookup(long pid)
+{
+ Pid *i, *e;
+ long o;
+
+ i = &pidhashtab[(pid<<PIDSHIFT) & pidhashmask];
+ for(e = &i[1<<PIDSHIFT]; i < e; i++){
+ o = i->pid;
+ if(o == pid)
+ return i;
+ if(o == 0)
+ break;
}
- unlock(&procalloc);
- return pid;
+ return nil;
}
-static void
-pidfree(Proc *p)
+/*
+ * increment the reference count of a pid (pid>0)
+ * or allocate a new one (pid<=0)
+ */
+static Pid*
+pidadd(long pid)
{
- int h;
- Proc **l;
+ Pid *i, *e;
+ long o;
- h = p->pid % nelem(procalloc.ht);
- lock(&procalloc);
- for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash)
- if(*l == p){
- *l = p->pidhash;
- break;
+ if(pid > 0){
+ i = pidlookup(pid);
+ if(i != nil)
+ incref(i);
+ return i;
+ }
+Again:
+ do {
+ static Ref gen;
+ pid = incref(&gen) & PIDMASK;
+ } while(pid == 0 || pidlookup(pid) != nil);
+
+ i = &pidhashtab[(pid<<PIDSHIFT) & pidhashmask];
+ for(e = &i[1<<PIDSHIFT]; i < e; i++){
+ while((o = i->pid) <= 0){
+ if(cmpswap(&i->pid, o, pid)){
+ incref(i);
+ return i;
+ }
}
- unlock(&procalloc);
+ }
+ /* bucket full, try a different pid */
+ goto Again;
}
+/*
+ * decrement reference count of a pid and free it
+ * when no references are remaining
+ */
+static void
+piddel(Pid *i)
+{
+ if(decref(i))
+ return;
+ i->pid = -1; /* freed */
+}
+
int
procindex(ulong pid)
{
- Proc *p;
- int h;
- int s;
+ Pid *i;
- s = -1;
- h = pid % nelem(procalloc.ht);
- lock(&procalloc);
- for(p = procalloc.ht[h]; p != nil; p = p->pidhash)
- if(p->pid == pid){
- s = p - procalloc.arena;
- break;
- }
- unlock(&procalloc);
- return s;
+ i = pidlookup(pid);
+ if(i != nil){
+ int x = i->procindex;
+ if(proctab(x)->pid == pid)
+ return x;
+ }
+ return -1;
+}
+
+ulong
+setnoteid(Proc *p, ulong noteid)
+{
+ Pid *i, *o;
+
+ /*
+ * avoid allocating a new pid when we are the only
+ * user of the noteid
+ */
+ o = pidlookup(p->noteid);
+ if(noteid == 0 && o->ref == 1)
+ return o->pid;
+
+ i = pidadd(noteid);
+ if(i == nil)
+ error(Ebadarg);
+ piddel(o);
+ return p->noteid = i->pid;
+}
+
+static ulong
+setparentpid(Proc *p, Proc *pp)
+{
+ Pid *i;
+
+ i = pidadd(pp->pid);
+ return p->parentpid = i->pid;
+}
+
+/*
+ * allocate pid, noteid and parentpid to a process
+ */
+ulong
+pidalloc(Proc *p)
+{
+ Pid *i;
+
+ /* skip for the boot process */
+ if(up != nil)
+ setparentpid(p, up);
+
+ i = pidadd(0);
+ i->procindex = (int)(p - procalloc.arena);
+
+ if(p->noteid == 0){
+ incref(i);
+ p->noteid = i->pid;
+ } else
+ pidadd(p->noteid);
+
+ return p->pid = i->pid;
+}
+
+/*
+ * release pid, noteid and parentpid from a process
+ */
+static void
+pidfree(Proc *p)
+{
+ Pid *i;
+
+ i = pidlookup(p->pid);
+ piddel(i);
+
+ if(p->noteid != p->pid)
+ i = pidlookup(p->noteid);
+ piddel(i);
+
+ if(p->parentpid != 0)
+ piddel(pidlookup(p->parentpid));
+
+ p->pid = p->noteid = p->parentpid = 0;
}
--- a/sys/src/9/port/sysproc.c
+++ b/sys/src/9/port/sysproc.c
@@ -80,12 +80,14 @@
closeegrp(oeg);
}
if(flag & RFNOTEG)
- up->noteid = pidalloc(nil);
+ setnoteid(up, 0);
return 0;
}
p = newproc();
+ qlock(&p->debug);
+
p->scallnr = up->scallnr;
p->s = up->s;
p->slash = up->slash;
@@ -96,10 +98,11 @@
p->nnote = up->nnote;
p->notify = up->notify;
p->notified = 0;
+ p->notepending = 0;
p->lastnote = up->lastnote;
+ if((flag & RFNOTEG) == 0)
+ p->noteid = up->noteid;
- p->noteid = up->noteid;
- p->parentpid = up->pid;
p->procmode = up->procmode;
p->privatemem = up->privatemem;
p->noswap = up->noswap;
@@ -106,6 +109,7 @@
p->hang = up->hang;
if(up->procctl == Proc_tracesyscall)
p->procctl = Proc_tracesyscall;
+ p->kp = 0;
/* Craft a return frame which will cause the child to pop out of
* the scheduler in user mode with the return register zero
@@ -115,6 +119,8 @@
kstrdup(&p->text, up->text);
kstrdup(&p->user, up->user);
kstrdup(&p->args, "");
+ p->nargs = 0;
+ p->setargs = 0;
p->insyscall = 0;
memset(p->time, 0, sizeof(p->time));
@@ -122,6 +128,8 @@
pid = pidalloc(p);
+ qunlock(&p->debug);
+
/* Abort the child process on error */
if(waserror()){
p->kp = 1;
@@ -188,9 +196,6 @@
p->egrp = up->egrp;
incref(p->egrp);
}
-
- if(flag & RFNOTEG)
- p->noteid = pid;
procfork(p);
--- a/sys/src/9/port/userinit.c
+++ b/sys/src/9/port/userinit.c
@@ -61,6 +61,7 @@
*/
up->kp = 0;
up->noswap = 0;
+ up->privatemem = 0;
procpriority(up, PriNormal, 0);
procsetup(up);