ref: 298f2396957bea59cf0985227a6dd903813b5938
parent: b2599999be1d51eedc0e11a15cec9e7fac253250
author: cinap_lenrek <[email protected]>
date: Thu May 10 15:31:58 EDT 2018
ip: add some primitive rate limiting knobs to counteract bufferbloat
--- a/sys/man/3/ip
+++ b/sys/man/3/ip
@@ -194,6 +194,17 @@
The mtu is the maximum size of the packet including any
medium-specific headers.
.TP
+.BI speed\ n
+Set the maximum transmit speed in bits per second.
+TP
+.BI delay\ n
+Set the maximum burst delay in milliseconds. (Default is 40ms)
+When
+.B speed
+has been set and packets in flight exceed the maximum burst
+delay then packets send on the interface are discarded until
+the load drops below the maximum.
+.TP
.BI iprouting\ n
Allow
.RI ( n
--- a/sys/src/9/ip/arp.c
+++ b/sys/src/9/ip/arp.c
@@ -303,7 +303,7 @@
freeblistchain(next);
break;
}
- ifc->m->bwrite(ifc, concatblock(bp), version, ip);
+ ipifcoput(ifc, bp, version, ip);
poperror();
}
return 1;
--- a/sys/src/9/ip/ip.c
+++ b/sys/src/9/ip/ip.c
@@ -180,6 +180,7 @@
runlock(ifc);
nexterror();
}
+
if(ifc->m == nil)
goto raise;
@@ -196,7 +197,7 @@
eh->cksum[0] = 0;
eh->cksum[1] = 0;
hnputs(eh->cksum, ipcsum(&eh->vihl));
- ifc->m->bwrite(ifc, concatblock(bp), V4, gate);
+ ipifcoput(ifc, bp, V4, gate);
runlock(ifc);
poperror();
return 0;
@@ -280,7 +281,7 @@
feh->cksum[0] = 0;
feh->cksum[1] = 0;
hnputs(feh->cksum, ipcsum(&feh->vihl));
- ifc->m->bwrite(ifc, nb, V4, gate);
+ ipifcoput(ifc, nb, V4, gate);
ip->stats[FragCreates]++;
}
ip->stats[FragOKs]++;
--- a/sys/src/9/ip/ip.h
+++ b/sys/src/9/ip/ip.h
@@ -327,6 +327,12 @@
uchar recvra6; /* flag: recv router advs on this ifc */
Routerparams rp; /* router parameters as in RFC 2461, pp.40—43.
used only if node is router */
+
+ int speed; /* link speed in bits per second */
+ int delay; /* burst delay in ms */
+ int burst; /* burst delay in bytes */
+ int load; /* bytes in flight */
+ ulong ticks;
};
/*
@@ -652,6 +658,7 @@
*/
extern Medium* ipfindmedium(char *name);
extern void addipmedium(Medium *med);
+extern void ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip);
extern int ipforme(Fs*, uchar *addr);
extern int ipismulticast(uchar *ip);
extern Ipifc* findipifc(Fs*, uchar *local, uchar *remote, int type);
--- a/sys/src/9/ip/ipifc.c
+++ b/sys/src/9/ip/ipifc.c
@@ -250,7 +250,7 @@
char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag"
" %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt"
-" %d pktin %lud pktout %lud errin %lud errout %lud\n";
+" %d pktin %lud pktout %lud errin %lud errout %lud speed %d delay %d\n";
char slineformat[] = " %-40I %-10M %-40I %-12lud %-12lud\n";
@@ -267,7 +267,8 @@
ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
- ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+ ifc->in, ifc->out, ifc->inerr, ifc->outerr,
+ ifc->speed, ifc->delay);
rlock(ifc);
for(lifc = ifc->lifc; lifc != nil && n > m; lifc = lifc->next)
@@ -309,6 +310,50 @@
return ifc->m != nil;
}
+static void
+ipifcsetdelay(Ipifc *ifc, int delay)
+{
+ if(delay < 0)
+ delay = 0;
+ else if(delay > 1000)
+ delay = 1000;
+ ifc->delay = delay;
+ ifc->burst = ((vlong)delay * ifc->speed) / 8000;
+ if(ifc->burst < ifc->maxtu)
+ ifc->burst = ifc->maxtu;
+}
+
+static void
+ipifcsetspeed(Ipifc *ifc, int speed)
+{
+ if(speed < 0)
+ speed = 0;
+ ifc->speed = speed;
+ ifc->load = 0;
+ ipifcsetdelay(ifc, ifc->delay);
+}
+
+void
+ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+ if(ifc->speed){
+ ulong now = MACHP(0)->ticks;
+ int dt = TK2MS(now - ifc->ticks);
+ ifc->ticks = now;
+ ifc->load -= ((vlong)dt * ifc->speed) / 8000;
+ if(ifc->load < 0 || dt < 0 || dt > 1000)
+ ifc->load = 0;
+ else if(ifc->load > ifc->burst){
+ freeblist(bp);
+ return;
+ }
+ }
+ bp = concatblock(bp);
+ ifc->load += BLEN(bp);
+ ifc->m->bwrite(ifc, bp, version, ip);
+}
+
+
/*
* called when a process writes to an interface's 'data'
*/
@@ -358,6 +403,8 @@
ifc->m = nil;
ifc->reflect = 0;
ifc->reassemble = 0;
+ ipifcsetspeed(ifc, 0);
+ ipifcsetdelay(ifc, 40);
}
/*
@@ -772,6 +819,14 @@
return ipifcunbind(ifc);
else if(strcmp(argv[0], "mtu") == 0)
return ipifcsetmtu(ifc, argv, argc);
+ else if(strcmp(argv[0], "speed") == 0){
+ ipifcsetspeed(ifc, argc>1? atoi(argv[1]): 0);
+ return nil;
+ }
+ else if(strcmp(argv[0], "delay") == 0){
+ ipifcsetdelay(ifc, argc>1? atoi(argv[1]): 0);
+ return nil;
+ }
else if(strcmp(argv[0], "iprouting") == 0){
iprouting(c->p->f, argc>1? atoi(argv[1]): 1);
return nil;
--- a/sys/src/9/ip/ipv6.c
+++ b/sys/src/9/ip/ipv6.c
@@ -103,7 +103,7 @@
medialen = ifc->maxtu - ifc->m->hsize;
if(len <= medialen) {
hnputs(eh->ploadlen, len - IP6HDR);
- ifc->m->bwrite(ifc, concatblock(bp), V6, gate);
+ ipifcoput(ifc, bp, V6, gate);
runlock(ifc);
poperror();
return 0;
@@ -193,8 +193,7 @@
if(xp->rp == xp->wp)
xp = xp->next;
}
-
- ifc->m->bwrite(ifc, nb, V6, gate);
+ ipifcoput(ifc, nb, V6, gate);
ip->stats[FragCreates]++;
}
ip->stats[FragOKs]++;