shithub: riscv

Download patch

ref: 298f2396957bea59cf0985227a6dd903813b5938
parent: b2599999be1d51eedc0e11a15cec9e7fac253250
author: cinap_lenrek <[email protected]>
date: Thu May 10 15:31:58 EDT 2018

ip: add some primitive rate limiting knobs to counteract bufferbloat

--- a/sys/man/3/ip
+++ b/sys/man/3/ip
@@ -194,6 +194,17 @@
 The mtu is the maximum size of the packet including any
 medium-specific headers.
 .TP
+.BI speed\  n
+Set the maximum transmit speed in bits per second.
+TP
+.BI delay\  n
+Set the maximum burst delay in milliseconds. (Default is 40ms)
+When
+.B speed
+has been set and packets in flight exceed the maximum burst
+delay then packets send on the interface are discarded until
+the load drops below the maximum.
+.TP
 .BI iprouting\  n
 Allow
 .RI ( n
--- a/sys/src/9/ip/arp.c
+++ b/sys/src/9/ip/arp.c
@@ -303,7 +303,7 @@
 					freeblistchain(next);
 					break;
 				}
-				ifc->m->bwrite(ifc, concatblock(bp), version, ip);
+				ipifcoput(ifc, bp, version, ip);
 				poperror();
 			}
 			return 1;
--- a/sys/src/9/ip/ip.c
+++ b/sys/src/9/ip/ip.c
@@ -180,6 +180,7 @@
 		runlock(ifc);
 		nexterror();
 	}
+
 	if(ifc->m == nil)
 		goto raise;
 
@@ -196,7 +197,7 @@
 		eh->cksum[0] = 0;
 		eh->cksum[1] = 0;
 		hnputs(eh->cksum, ipcsum(&eh->vihl));
-		ifc->m->bwrite(ifc, concatblock(bp), V4, gate);
+		ipifcoput(ifc, bp, V4, gate);
 		runlock(ifc);
 		poperror();
 		return 0;
@@ -280,7 +281,7 @@
 		feh->cksum[0] = 0;
 		feh->cksum[1] = 0;
 		hnputs(feh->cksum, ipcsum(&feh->vihl));
-		ifc->m->bwrite(ifc, nb, V4, gate);
+		ipifcoput(ifc, nb, V4, gate);
 		ip->stats[FragCreates]++;
 	}
 	ip->stats[FragOKs]++;
--- a/sys/src/9/ip/ip.h
+++ b/sys/src/9/ip/ip.h
@@ -327,6 +327,12 @@
 	uchar	recvra6;	/* flag: recv router advs on this ifc */
 	Routerparams rp;	/* router parameters as in RFC 2461, pp.40—43.
 					used only if node is router */
+
+	int	speed;		/* link speed in bits per second */
+	int	delay;		/* burst delay in ms */
+	int	burst;		/* burst delay in bytes */
+	int	load;		/* bytes in flight */
+	ulong	ticks;
 };
 
 /*
@@ -652,6 +658,7 @@
  */
 extern Medium*	ipfindmedium(char *name);
 extern void	addipmedium(Medium *med);
+extern void	ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip);
 extern int	ipforme(Fs*, uchar *addr);
 extern int	ipismulticast(uchar *ip);
 extern Ipifc*	findipifc(Fs*, uchar *local, uchar *remote, int type);
--- a/sys/src/9/ip/ipifc.c
+++ b/sys/src/9/ip/ipifc.c
@@ -250,7 +250,7 @@
 
 char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag"
 " %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt"
-" %d pktin %lud pktout %lud errin %lud errout %lud\n";
+" %d pktin %lud pktout %lud errin %lud errout %lud speed %d delay %d\n";
 
 char slineformat[] = "	%-40I %-10M %-40I %-12lud %-12lud\n";
 
@@ -267,7 +267,8 @@
 		ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
 		ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
 		ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
-		ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+		ifc->in, ifc->out, ifc->inerr, ifc->outerr,
+		ifc->speed, ifc->delay);
 
 	rlock(ifc);
 	for(lifc = ifc->lifc; lifc != nil && n > m; lifc = lifc->next)
@@ -309,6 +310,50 @@
 	return ifc->m != nil;
 }
 
+static void
+ipifcsetdelay(Ipifc *ifc, int delay)
+{
+	if(delay < 0)
+		delay = 0;
+	else if(delay > 1000)
+		delay = 1000;
+	ifc->delay = delay;
+	ifc->burst = ((vlong)delay * ifc->speed) / 8000;
+	if(ifc->burst < ifc->maxtu)
+		ifc->burst = ifc->maxtu;
+}
+
+static void
+ipifcsetspeed(Ipifc *ifc, int speed)
+{
+	if(speed < 0)
+		speed = 0;
+	ifc->speed = speed;
+	ifc->load = 0;
+	ipifcsetdelay(ifc, ifc->delay);
+}
+
+void
+ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+	if(ifc->speed){
+		ulong now = MACHP(0)->ticks;
+		int dt = TK2MS(now - ifc->ticks);
+		ifc->ticks = now;
+		ifc->load -= ((vlong)dt * ifc->speed) / 8000;
+		if(ifc->load < 0 || dt < 0 || dt > 1000)
+			ifc->load = 0;
+		else if(ifc->load > ifc->burst){
+			freeblist(bp);
+			return;
+		}
+	}
+	bp = concatblock(bp);
+	ifc->load += BLEN(bp);
+	ifc->m->bwrite(ifc, bp, version, ip);
+}
+
+
 /*
  *  called when a process writes to an interface's 'data'
  */
@@ -358,6 +403,8 @@
 	ifc->m = nil;
 	ifc->reflect = 0;
 	ifc->reassemble = 0;
+	ipifcsetspeed(ifc, 0);
+	ipifcsetdelay(ifc, 40);
 }
 
 /*
@@ -772,6 +819,14 @@
 		return ipifcunbind(ifc);
 	else if(strcmp(argv[0], "mtu") == 0)
 		return ipifcsetmtu(ifc, argv, argc);
+	else if(strcmp(argv[0], "speed") == 0){
+		ipifcsetspeed(ifc, argc>1? atoi(argv[1]): 0);
+		return nil;
+	}
+	else if(strcmp(argv[0], "delay") == 0){
+		ipifcsetdelay(ifc, argc>1? atoi(argv[1]): 0);
+		return nil;
+	}
 	else if(strcmp(argv[0], "iprouting") == 0){
 		iprouting(c->p->f, argc>1? atoi(argv[1]): 1);
 		return nil;
--- a/sys/src/9/ip/ipv6.c
+++ b/sys/src/9/ip/ipv6.c
@@ -103,7 +103,7 @@
 	medialen = ifc->maxtu - ifc->m->hsize;
 	if(len <= medialen) {
 		hnputs(eh->ploadlen, len - IP6HDR);
-		ifc->m->bwrite(ifc, concatblock(bp), V6, gate);
+		ipifcoput(ifc, bp, V6, gate);
 		runlock(ifc);
 		poperror();
 		return 0;
@@ -193,8 +193,7 @@
 			if(xp->rp == xp->wp)
 				xp = xp->next;
 		}
-
-		ifc->m->bwrite(ifc, nb, V6, gate);
+		ipifcoput(ifc, nb, V6, gate);
 		ip->stats[FragCreates]++;
 	}
 	ip->stats[FragOKs]++;