ref: 69e9efa847c03924caef9db2a0a4db586ddcc2f9
parent: 6fbf81a98f8f747444cfe0e7a0591f6de511e510
author: cinap_lenrek <[email protected]>
date: Sun Oct 15 17:20:30 EDT 2023
ndb/dns: avoid blocking 9p on refresh, use shorter 15second query timeout, cleanup When writing "refresh" control message to trigger garbage collection and there are other (query) processes active, the 9p loop would block until these processes complete. Instead, defer the garbage collection operation to these outstanding processes once any of them completes, keeping the 9p loop un-contended for longer. When theres no other processes, we can garbage collect ourselfs immediately (just as before). Only acquire the dnvars lock on Twrite, so we can handle other 9p requests while garbage collection is going on. Also reduce the 60 second timeout to 15 second for queries. Other dns servers usually use something between 10 and 30 seconds. Avoid excessive nsec() and time() calls by having timems() function that only uses nsec() and then updates global nowms (millisecons) and now (seconds) variable. After the end of readnet(), timems() needs to be called anyway to update the query statistics so we can use that as the new "now".
--- a/sys/src/cmd/ndb/dn.c
+++ b/sys/src/cmd/ndb/dn.c
@@ -29,8 +29,8 @@
Lock;
ulong names; /* names allocated */
ulong oldest; /* longest we'll leave a name around */
- int active;
- int mutex;
+ int active; /* number of active processes */
+ int mutex; /* 0 or pid of process doing garbage collection */
ushort id; /* same size as in packet */
} dnvars;
@@ -149,6 +149,15 @@
noted(NDFLT); /* die */
}
+uvlong
+timems(void)
+{
+ uvlong ms = nsec()/1000000L;
+ now = ms/1000L;
+ nowms = ms;
+ return ms;
+}
+
void
dninit(void)
{
@@ -164,6 +173,8 @@
dnvars.names = 0;
dnvars.id = truerand(); /* don't start with same id every time */
+ timems();
+
notify(ding);
}
@@ -665,11 +676,12 @@
if(traceactivity)
dnslog("get: %d active by pid %d from %p",
dnvars.active, getpid(), getcallerpc(&req));
- lock(&dnvars);
+
/*
* can't block here if we're already holding one
* of the dnvars.active (recursive). will deadlock.
*/
+ lock(&dnvars);
while(!recursive && dnvars.mutex){
unlock(&dnvars);
sleep(100); /* tune; was 200 */
@@ -676,8 +688,6 @@
lock(&dnvars);
}
rv = ++dnvars.active;
- now = time(nil);
- nowns = nsec();
req->id = ++dnvars.id;
req->aux = nil;
unlock(&dnvars);
@@ -687,9 +697,11 @@
void
putactivity(int recursive)
{
+ int pid = getpid();
+
if(traceactivity)
- dnslog("put: %d active by pid %d",
- dnvars.active, getpid());
+ dnslog("put: %d active by pid %d", dnvars.active, pid);
+
lock(&dnvars);
dnvars.active--;
assert(dnvars.active >= 0); /* "dnvars.active %d", dnvars.active */
@@ -698,15 +710,16 @@
* clean out old entries and check for new db periodicly
* can't block here if being called to let go a "recursive" lock
* or we'll deadlock waiting for ourselves to give up the dnvars.active.
+ * also don't block if we are the 9p process (needrefresh == pid).
*/
- if (recursive || dnvars.mutex ||
- (needrefresh == 0 && dnvars.active > 0)){
+ if(recursive || dnvars.mutex
+ || dnvars.active > 0 && (needrefresh == 0 || needrefresh == pid)){
unlock(&dnvars);
return;
}
/* wait till we're alone */
- dnvars.mutex = 1;
+ dnvars.mutex = pid;
while(dnvars.active > 0){
unlock(&dnvars);
sleep(100); /* tune; was 100 */
@@ -714,13 +727,14 @@
}
unlock(&dnvars);
- db2cache(needrefresh);
-
+ db2cache(needrefresh != 0);
dnageall(0);
/* let others back in */
+ lock(&dnvars);
needrefresh = 0;
dnvars.mutex = 0;
+ unlock(&dnvars);
}
int
--- a/sys/src/cmd/ndb/dnresolve.c
+++ b/sys/src/cmd/ndb/dnresolve.c
@@ -235,7 +235,7 @@
* thus we don't wait very long for them.
*/
static void
-notestats(vlong start, int tmout, int type)
+notestats(long ms, int tmout, int type)
{
qlock(&stats);
if (tmout) {
@@ -245,7 +245,7 @@
else if (type == Tcname)
stats.tmoutcname++;
} else {
- long wait10ths = NS2MS(nsec() - start) / 100;
+ long wait10ths = ms / 100;
if (wait10ths <= 0)
stats.under10ths[0]++;
@@ -559,31 +559,31 @@
/* timed read of reply. sets srcip. ibuf must be 64K to handle tcp answers. */
static int
-readnet(Query *qp, int medium, uchar *ibuf, uvlong endms, uchar **replyp,
- uchar *srcip)
+readnet(Query *qp, int medium, uchar *ibuf, uvlong endms, uchar **replyp, uchar *srcip)
{
int len, fd;
long ms;
- vlong startns = nsec();
+ uvlong startms;
uchar *reply;
uchar lenbuf[2];
- len = -1; /* pessimism */
*replyp = nil;
memset(srcip, 0, IPaddrlen);
- ms = endms - NS2MS(startns);
- if (ms <= 0)
+
+ startms = nowms;
+ ms = (long)(endms - startms);
+ if (ms < 1)
return -1; /* taking too long */
+ len = -1; /* pessimism */
reply = ibuf;
- alarm(ms);
if (medium == Udp)
if (qp->udpfd < 0)
dnslog("readnet: qp->udpfd closed");
else {
+ alarm(ms);
len = read(qp->udpfd, ibuf, Udphdrsize+Maxudpin);
alarm(0);
- notestats(startns, len < 0, qp->type);
if (len >= IPaddrlen)
memmove(srcip, ibuf, IPaddrlen);
if (len >= Udphdrsize) {
@@ -598,24 +598,28 @@
if (fd < 0)
dnslog("readnet: %s: tcp fd unset for dest %I",
qp->dp->name, qp->tcpip);
- else if (readn(fd, lenbuf, 2) != 2) {
- dnslog("readnet: short read of 2-byte tcp msg size from %I",
- qp->tcpip);
- /* probably a time-out */
- notestats(startns, 1, qp->type);
- } else {
- len = lenbuf[0]<<8 | lenbuf[1];
- if (readn(fd, ibuf, len) != len) {
- dnslog("readnet: short read of tcp data from %I",
- qp->tcpip);
- /* probably a time-out */
- notestats(startns, 1, qp->type);
- len = -1;
+ else {
+ alarm(ms);
+ if (readn(fd, lenbuf, 2) != 2) {
+ alarm(0);
+ dnslog("readnet: short read of 2-byte tcp msg size from %I", qp->tcpip);
+ } else {
+ len = lenbuf[0]<<8 | lenbuf[1];
+ if (readn(fd, ibuf, len) != len) {
+ alarm(0);
+ dnslog("readnet: short read of tcp data from %I", qp->tcpip);
+ len = -1;
+ } else {
+ alarm(0);
+ }
}
}
memmove(srcip, qp->tcpip, IPaddrlen);
}
- alarm(0);
+
+ ms = (long)(timems() - startms);
+ notestats(ms, len < 0, qp->type);
+
*replyp = reply;
return len;
}
@@ -631,14 +635,14 @@
{
int len;
char *err;
- char tbuf[32];
uchar *reply;
uchar srcip[IPaddrlen];
RR *rp;
- for (; timems() < endms &&
- (len = readnet(qp, medium, ibuf, endms, &reply, srcip)) >= 0;
- freeanswers(mp)){
+ for (;; freeanswers(mp)) {
+ len = readnet(qp, medium, ibuf, endms, &reply, srcip);
+ if (len < 0)
+ break;
/* convert into internal format */
memset(mp, 0, sizeof *mp);
err = convM2DNS(reply, len, mp, nil);
@@ -675,20 +679,6 @@
return 0;
}
}
- if (timems() >= endms) {
- ; /* query expired */
- } else if (0) {
- /* this happens routinely when a read times out */
- dnslog("readreply: %s type %s: ns %I read error or eof "
- "(returned %d): %r", qp->dp->name, rrname(qp->type,
- tbuf, sizeof tbuf), srcip, len);
- if (medium == Udp)
- for (rp = qp->nsrp; rp != nil; rp = rp->next)
- if (rp->type == Tns)
- dnslog("readreply: %s: query sent to "
- "ns %s", qp->dp->name,
- rp->host->name);
- }
memset(mp, 0, sizeof *mp);
return -1;
}
@@ -985,9 +975,6 @@
char buf[32];
Dest *p;
- if(timems() >= qp->req->aborttime)
- return -1;
-
/*
* if we send tcp query, we just take the dest ip address from
* the udp header placed there by tcpquery().
@@ -1272,15 +1259,10 @@
*/
static int
tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
- ulong waitms, int inns, ushort req)
+ uvlong endms, int inns, ushort req)
{
int rv = 0;
- uvlong endms;
- endms = timems() + waitms;
- if(endms > qp->req->aborttime)
- endms = qp->req->aborttime;
-
if (0)
dnslog("%s: udp reply truncated; retrying query via tcp to %I",
qp->dp->name, qp->tcpip);
@@ -1338,10 +1320,13 @@
for(ndest = 2; ndest < Maxdest; ndest += 2){
qp->ndest = ndest;
qp->tcpset = 0;
+
+ endms = nowms;
+ if(endms >= qp->req->aborttime)
+ break;
if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
break;
-
- endms = timems() + waitms;
+ endms += waitms;
if(endms > qp->req->aborttime)
endms = qp->req->aborttime;
@@ -1361,8 +1346,10 @@
} else {
/* whoops, it was truncated! ask again via tcp */
freeanswers(&m);
+ if(nowms >= endms)
+ break;
rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
- waitms, inns, req); /* answer in m */
+ endms, inns, req); /* answer in m */
if (rv < 0) {
freeanswers(&m);
break; /* failed via tcp too */
@@ -1516,8 +1503,8 @@
Query q;
memset(&req, 0, sizeof req);
- req.isslave = 1;
req.aborttime = timems() + Maxreqtm;
+ req.isslave = 1;
req.from = "internal";
queryinit(&q, dnlookup(root, Cin, 1), Tns, &req);
nsrp = randomize(dblookup(root, Cin, Tns, 0, 0));
--- a/sys/src/cmd/ndb/dns.c
+++ b/sys/src/cmd/ndb/dns.c
@@ -64,7 +64,7 @@
int mfd[2];
int needrefresh;
ulong now;
-vlong nowns;
+uvlong nowms;
int sendnotifies;
char *trace;
int traceactivity;
@@ -186,8 +186,6 @@
(cfg.resolver? "resolver ": ""), mntpt);
opendatabase();
- now = time(nil); /* open time files before we fork */
- nowns = nsec();
dnsuser = estrdup(getuser());
snprint(servefile, sizeof servefile, "/srv/dns%s", ext);
@@ -421,10 +419,6 @@
if(debug)
dnslog("%F", &job->request);
- getactivity(&req, 0);
- req.aborttime = timems() + Maxreqtm;
- req.from = "9p";
-
switch(job->request.type){
default:
warning("unknown request type %d", job->request.type);
@@ -454,9 +448,17 @@
rread(job, mf);
break;
case Twrite:
- /* &req is handed to dnresolve() */
+ getactivity(&req, 0);
+ req.aborttime = timems() + Maxreqtm;
+ req.from = "9p";
rwrite(job, mf, &req);
- break;
+ freejob(job);
+ if(req.isslave){
+ putactivity(0);
+ _exits(0);
+ }
+ putactivity(0);
+ continue;
case Tclunk:
rclunk(job, mf);
break;
@@ -470,18 +472,7 @@
rwstat(job, mf);
break;
}
-
freejob(job);
-
- /*
- * slave processes die after replying
- */
- if(req.isslave){
- putactivity(0);
- _exits(0);
- }
-
- putactivity(0);
}
}
@@ -702,7 +693,7 @@
else if(strcmp(job->request.data, "dump")==0)
dndump("/lib/ndb/dnsdump");
else if(strcmp(job->request.data, "refresh")==0)
- needrefresh = 1;
+ needrefresh = getpid();
else if(strcmp(job->request.data, "stats")==0)
dnstats("/lib/ndb/dnsstats");
else if(strncmp(job->request.data, "target ", 7)==0){
--- a/sys/src/cmd/ndb/dns.h
+++ b/sys/src/cmd/ndb/dns.h
@@ -1,8 +1,3 @@
-#define NS2MS(ns) ((ns) / 1000000L)
-#define S2MS(s) ((s) * 1000LL)
-
-#define timems() NS2MS(nsec())
-
typedef struct Ndbtuple Ndbtuple;
enum
@@ -152,7 +147,7 @@
Maxactive= 250,
/* tune; was 8*1000; that was too short */
- Maxreqtm= 60*1000, /* max. ms to process a request */
+ Maxreqtm= 15*1000, /* max. ms to process a request */
Notauthoritative = 0,
Authoritative,
@@ -424,10 +419,10 @@
extern char *logfile;
extern int maxage; /* age of oldest entry in cache (secs) */
extern char mntpt[];
-extern int needrefresh;
+extern int needrefresh; /* set to pid of the process requesting flush */
extern int norecursion;
extern ulong now; /* time base */
-extern vlong nowns;
+extern uvlong nowms;
extern Area *owned;
extern int sendnotifies;
extern ulong target;
@@ -491,6 +486,7 @@
int rrtype(char*);
void slave(Request*);
int subsume(char*, char*);
+uvlong timems(void);
int tsame(int, int);
void unique(RR*);
void warning(char*, ...);
--- a/sys/src/cmd/ndb/dnsdebug.c
+++ b/sys/src/cmd/ndb/dnsdebug.c
@@ -22,7 +22,7 @@
char mntpt[Maxpath];
int needrefresh;
ulong now;
-vlong nowns;
+uvlong nowms;
char *trace;
int traceactivity;
char *zonerefreshprogram;
@@ -76,8 +76,6 @@
usage();
}ARGEND
- now = time(nil);
- nowns = nsec();
dninit();
fmtinstall('P', prettyrrfmt);
opendatabase();
@@ -258,7 +256,7 @@
}
memset(&req, 0, sizeof req);
req.isslave = 1;
- req.aborttime = NS2MS(nowns) + Maxreqtm;
+ req.aborttime = timems() + Maxreqtm;
*l = dnresolve(rp->host->name, Cin, Ta, &req, nil, 0, Recurse, 0, nil);
if(*l == nil)
*l = dnresolve(rp->host->name, Cin, Taaaa, &req,
@@ -345,7 +343,7 @@
memset(&req, 0, sizeof req);
getactivity(&req, 0);
req.isslave = 1;
- req.aborttime = NS2MS(nowns) + Maxreqtm;
+ req.aborttime = timems() + Maxreqtm;
rr = dnresolve(buf, Cin, type, &req, nil, 0, Recurse, rooted, nil);
if(rr){
print("----------------------------\n");
--- a/sys/src/cmd/ndb/dnsgetip.c
+++ b/sys/src/cmd/ndb/dnsgetip.c
@@ -13,7 +13,7 @@
char mntpt[Maxpath];
int needrefresh = 0;
ulong now = 0;
-vlong nowns = 0;
+uvlong nowms = 0;
int traceactivity = 0;
char *zonerefreshprogram;
@@ -36,7 +36,7 @@
memset(&req, 0, sizeof req);
getactivity(&req, 0);
req.isslave = 1;
- req.aborttime = NS2MS(nowns) + Maxreqtm;
+ req.aborttime = timems() + Maxreqtm;
rr = dnresolve(name, Cin, type, &req, nil, 0, Recurse, 0, &status);
neg = rrremneg(&rr);
--- a/sys/src/cmd/ndb/dnstcp.c
+++ b/sys/src/cmd/ndb/dnstcp.c
@@ -19,7 +19,7 @@
char mntpt[Maxpath];
int needrefresh;
ulong now;
-vlong nowns;
+uvlong nowms;
int traceactivity;
char *zonerefreshprogram;
@@ -98,7 +98,6 @@
/* loop on requests */
for(;; putactivity(0)){
- now = time(nil);
memset(&repmsg, 0, sizeof repmsg);
len = readmsg(0, buf, sizeof buf);
if(len <= 0)
@@ -105,7 +104,7 @@
break;
getactivity(&req, 0);
- req.aborttime = timems() + S2MS(15*Min);
+ req.aborttime = timems() + 15*Min*1000;
rcode = 0;
memset(&reqmsg, 0, sizeof reqmsg);
err = convM2DNS(buf, len, &reqmsg, &rcode);
--- a/sys/src/cmd/ndb/dnudpserver.c
+++ b/sys/src/cmd/ndb/dnudpserver.c
@@ -131,7 +131,7 @@
{
volatile int fd, len, op, rcode, served;
char *volatile err;
- volatile char tname[32];
+ volatile char tname[32], ipstr[64];
volatile uchar buf[Udphdrsize + Maxudp + 1024];
volatile DNSmsg reqmsg, repmsg;
Inprogress *volatile p;
@@ -165,8 +165,6 @@
if(setjmp(req.mret))
putactivity(0);
req.isslave = 0;
- req.id = 0;
- req.aborttime = 0;
/* loop on requests */
for(;; putactivity(0)){
@@ -188,16 +186,16 @@
uh = (Udphdr*)buf;
len -= Udphdrsize;
- // dnslog("read received UDP from %I to %I",
- // ((Udphdr*)buf)->raddr, ((Udphdr*)buf)->laddr);
+ // dnslog("read received UDP from %I to %I", uh->raddr, uh->laddr);
+ snprint(ipstr, sizeof(ipstr), "%I", uh->raddr);
getactivity(&req, 0);
req.aborttime = timems() + Maxreqtm;
- req.from = smprint("%I", buf);
- rcode = 0;
+ req.from = ipstr;
served++;
stats.qrecvdudp++;
+ rcode = 0;
err = convM2DNS(&buf[Udphdrsize], len, &reqmsg, &rcode);
if(err){
/* first bytes in buf are source IP addr */
@@ -215,8 +213,7 @@
}
op = reqmsg.flags & Omask;
if(op != Oquery && op != Onotify){
- dnslog("server: op %d from %I", reqmsg.flags & Omask,
- buf);
+ dnslog("server: op %d from %I", reqmsg.flags & Omask, buf);
goto freereq;
}
@@ -267,8 +264,6 @@
p->inuse = 0;
freereq:
- free(req.from);
- req.from = nil;
freeanswers(&reqmsg);
if(req.isslave){
putactivity(0);