ref: d2a7d886624c56673a6d7ba7d6a7958d2be5b867
parent: c14ea9fdd1521ff9322f9af71b801e016622c0cd
author: cinap_lenrek <[email protected]>
date: Sat Mar 12 15:53:17 EST 2022
devip: implement network address translation routes This adds a new route "t"-flag that enables network address translation, replacing the source address (and local port) of a forwarded packet to one of the outgoing interface. The state for a translation is kept in a new Translation structure, which contains two Iphash entries, so it can be inserted into the per protocol 4-tuple hash table, requiering no extra lookups. Translations have a low overhead (~200 bytes on amd64), so we can have many of them. They get reused after 5 minutes of inactivity or when the per protocol limit of 1000 entries is reached (then the one with longest inactivity is reused). The protocol needs to export a "forward" function that is responsible for modifying the forwarded packet, and then handle translations in its input function for iphash hits with Iphash.trans != 0. This patch also fixes a few minor things found during development: - Include the Iphash in the Conv structure, avoiding estra malloc - Fix ttl exceeded check (ttl < 1 -> ttl <= 1) - Router should not reply with ttl exceeded for multicast flows - Extra checks for icmp advice to avoid protocol confusions.
--- a/sys/man/3/ip
+++ b/sys/man/3/ip
@@ -411,6 +411,9 @@
.TP
.B p
point-to-point route
+.TP
+.B t
+network address translation on source
.PD
.PP
The tag is an arbitrary, up to 4 character, string. It is normally used to
@@ -442,7 +445,7 @@
.TP
.BI add\ "target mask nexthop tag interface source smask"
.TP
-.BI add\ "target mask nexthop type tag interface source smask"
+.BI add\ "target mask nexthop flags tag interface source smask"
Add the route to the table. If one already exists with the
same target and mask, replace it. The
.I interface
@@ -461,7 +464,7 @@
.TP
.BI remove\ "target mask nexthop tag interface source smask"
.TP
-.BI remove\ "target mask nexthop type tag interface source smask"
+.BI remove\ "target mask nexthop flags tag interface source smask"
Remove the matching route.
.
.SS "Address resolution
--- a/sys/src/9/ip/devip.c
+++ b/sys/src/9/ip/devip.c
@@ -737,6 +737,7 @@
char*
setluniqueport(Conv* c, int lport)
{
+ Translation *q;
Proto *p;
Conv *xp;
int x;
@@ -754,14 +755,22 @@
&& xp->lport == lport
&& xp->rport == c->rport
&& ipcmp(xp->raddr, c->raddr) == 0
- && ipcmp(xp->laddr, c->laddr) == 0){
- qunlock(p);
- return "address in use";
- }
+ && ipcmp(xp->laddr, c->laddr) == 0)
+ goto Inuse;
}
+ for(q = p->translations; q != nil; q = q->next){
+ if(q->backward.lport == lport
+ && q->backward.rport == c->rport
+ && ipcmp(q->backward.raddr, c->raddr) == 0
+ && ipcmp(q->backward.laddr, c->laddr) == 0)
+ goto Inuse;
+ }
c->lport = lport;
qunlock(p);
return nil;
+Inuse:
+ qunlock(p);
+ return "address in use";
}
/*
@@ -770,18 +779,51 @@
static int
lportinuse(Proto *p, ushort lport)
{
+ Translation *q;
int x;
for(x = 0; x < p->nc && p->conv[x]; x++)
if(p->conv[x]->lport == lport)
return 1;
+ for(q = p->translations; q != nil; q = q->next)
+ if(q->backward.lport == lport)
+ return 1;
return 0;
}
+/*
+ * find a unused loacal port for a protocol.
+ *
+ * p needs to be locked
+ */
+int
+unusedlport(Proto *p)
+{
+ ushort port;
+ int i;
+
+ /*
+ * Unrestricted ports are chosen randomly
+ * between 2^15 and 2^16. There are at most
+ * 4*Nchan = 4096 ports in use at any given time,
+ * so even in the worst case, a random probe has a
+ * 1 - 4096/2^15 = 87% chance of success.
+ * If 64 successive probes fail, there is a bug somewhere
+ * (or a once in 10^58 event has happened, but that's
+ * less likely than a venti collision).
+ */
+ for(i=0; i<64; i++){
+ port = (1<<15) + nrand(1<<15);
+ if(!lportinuse(p, port))
+ return port;
+ }
+ return -1;
+}
+
/*
* pick a local port and set it
*/
-char *
+static char *
setlport(Conv* c)
{
Proto *p;
@@ -799,21 +841,9 @@
goto chosen;
}
}else{
- /*
- * Unrestricted ports are chosen randomly
- * between 2^15 and 2^16. There are at most
- * 4*Nchan = 4096 ports in use at any given time,
- * so even in the worst case, a random probe has a
- * 1 - 4096/2^15 = 87% chance of success.
- * If 64 successive probes fail, there is a bug somewhere
- * (or a once in 10^58 event has happened, but that's
- * less likely than a venti collision).
- */
- for(i=0; i<64; i++){
- port = (1<<15) + nrand(1<<15);
- if(!lportinuse(p, port))
- goto chosen;
- }
+ port = unusedlport(p);
+ if(port > 0)
+ goto chosen;
}
qunlock(p);
return "no ports available";
--- a/sys/src/9/ip/icmp.c
+++ b/sys/src/9/ip/icmp.c
@@ -99,6 +99,8 @@
/* message counts */
ulong in[Maxtype+1];
ulong out[Maxtype+1];
+
+ Ipht ht;
};
static void icmpkick(void *x, Block*);
@@ -192,9 +194,9 @@
uchar addr[IPaddrlen];
int i;
- v4tov6(addr, ip4);
- if(ipismulticast(addr))
+ if(isv4mcast(ip4))
return 0;
+ v4tov6(addr, ip4);
i = ipforme(f, addr);
return i == 0 || i == Runi;
}
@@ -204,9 +206,9 @@
{
uchar addr[IPaddrlen];
- v4tov6(addr, ip4);
- if(ipismulticast(addr))
+ if(isv4mcast(ip4))
return 0;
+ v4tov6(addr, ip4);
return ipforme(f, addr) == Runi;
}
@@ -218,7 +220,7 @@
uchar ia[IPv4addrlen];
p = (Icmp *)bp->rp;
- if(!ip4reply(f, p->src) || !ipv4local(ifc, ia, 0, p->src))
+ if(isv4mcast(p->dst) || !ip4reply(f, p->src) || !ipv4local(ifc, ia, 0, p->src))
return;
netlog(f, Logicmp, "sending icmpttlexceeded %V -> src %V dst %V\n",
@@ -249,7 +251,7 @@
uchar ia[IPv4addrlen];
p = (Icmp *)bp->rp;
- if(!ip4reply(f, p->src))
+ if(isv4mcast(p->dst) || !ip4reply(f, p->src))
return;
if(ifc == nil){
@@ -302,21 +304,43 @@
static void
goticmpkt(Proto *icmp, Block *bp)
{
- ushort recid;
uchar dst[IPaddrlen], src[IPaddrlen];
+ ushort recid;
Conv **c, *s;
+ Iphash *iph;
Icmp *p;
-
- p = (Icmp *) bp->rp;
+
+ p = (Icmp *)bp->rp;
v4tov6(dst, p->dst);
v4tov6(src, p->src);
recid = nhgets(p->icmpid);
+ qlock(icmp);
+ iph = iphtlook(&((Icmppriv*)icmp->priv)->ht, src, recid, dst, recid);
+ if(iph != nil){
+ Translation *q;
+ int hop = p->ttl;
+
+ if(hop <= 1 || (q = transbackward(icmp, iph)) == nil)
+ goto raise;
+ memmove(p->dst, q->forward.raddr+IPv4off, IPv4addrlen);
+ hnputs_csum(p->icmpid, q->forward.rport, p->cksum);
+
+ /* only use route-hint when from original desination */
+ if(memcmp(p->src, q->forward.laddr+IPv4off, IPv4addrlen) != 0)
+ q = nil;
+ qunlock(icmp);
+
+ ipoput4(icmp->f, bp, 1, hop - 1, p->tos, q);
+ return;
+ }
for(c = icmp->conv; (s = *c) != nil; c++){
if(s->lport == recid)
if(ipcmp(s->laddr, dst) == 0 || ipcmp(s->raddr, src) == 0)
qpass(s->rq, copyblock(bp, blocklen(bp)));
}
+raise:
+ qunlock(icmp);
freeblist(bp);
}
@@ -404,6 +428,13 @@
ipriv->out[EchoReply]++;
ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
break;
+ case TimeExceed:
+ if(p->code == 0){
+ snprint(msg = m2, sizeof m2, "ttl exceeded at %V", p->src);
+ goto Advise;
+ }
+ goticmpkt(icmp, bp);
+ break;
case Unreachable:
if(p->code >= nelem(unreachcode)) {
snprint(m2, sizeof m2, "unreachable %V -> %V code %d",
@@ -411,7 +442,6 @@
msg = m2;
} else
msg = unreachcode[p->code];
-
Advise:
bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
if(BLEN(bp) < MinAdvise){
@@ -419,40 +449,80 @@
goto raise;
}
p = (Icmp *)bp->rp;
- if((nhgets(p->frag) & IP_FO) == 0){
+ if(p->vihl == (IP_VER4|IP_HLEN4) /* advise() does not expect options */
+ && (nhgets(p->frag) & IP_FO) == 0 /* first fragment */
+ && ipcsum(&p->vihl) == 0){
pr = Fsrcvpcolx(icmp->f, p->proto);
if(pr != nil && pr->advise != nil) {
+ netlog(icmp->f, Logicmp, "advising %s!%V -> %V: %s\n", pr->name, p->src, p->dst, msg);
(*pr->advise)(pr, bp, msg);
return;
}
}
bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
- goticmpkt(icmp, bp);
- break;
- case TimeExceed:
- if(p->code == 0){
- snprint(msg = m2, sizeof m2, "ttl exceeded at %V", p->src);
- goto Advise;
- }
- goticmpkt(icmp, bp);
- break;
+ /* wet floor */
default:
goticmpkt(icmp, bp);
break;
}
return;
-
raise:
freeblist(bp);
}
+/*
+ * called from protocol advice handlers when the advice
+ * is actually for someone we source translate (ip4).
+ * the caller has fixed up the ip address and ports
+ * in the inner header, so we just restore the outer
+ * ip/icmp headers, recalculating icmp checksum
+ * and send the advice to ip4.
+ */
+void
+icmpproxyadvice(Fs *f, Block *bp, uchar *ip4)
+{
+ Icmp *p;
+ int hop;
+
+ /* inner header */
+ p = (Icmp *) bp->rp;
+ if(p->vihl != (IP_VER4|IP_HLEN4))
+ goto drop;
+ if(ipcsum(&p->vihl) != 0)
+ goto drop;
+
+ /* outer header */
+ bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+ p = (Icmp *) bp->rp;
+ if(p->vihl != (IP_VER4|(ICMP_IPSIZE>>2)))
+ goto drop;
+
+ hop = p->ttl;
+ if(hop <= 1)
+ goto drop;
+
+ netlog(f, Logicmp|Logtrans, "proxying icmp advice from %V to %V->%V\n",
+ p->src, p->dst, ip4);
+ memmove(p->dst, ip4, IPv4addrlen);
+
+ /* recalculate ICMP checksum */
+ memset(p->cksum, 0, sizeof(p->cksum));
+ hnputs(p->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
+
+ ipoput4(f, bp, 1, hop - 1, p->tos, nil);
+ return;
+drop:
+ freeblist(bp);
+}
+
static void
icmpadvise(Proto *icmp, Block *bp, char *msg)
{
- ushort recid;
uchar dst[IPaddrlen], src[IPaddrlen];
+ ushort recid;
Conv **c, *s;
Icmp *p;
+ Iphash *iph;
p = (Icmp *) bp->rp;
v4tov6(dst, p->dst);
@@ -459,6 +529,23 @@
v4tov6(src, p->src);
recid = nhgets(p->icmpid);
+ qlock(icmp);
+ iph = iphtlook(&((Icmppriv*)icmp->priv)->ht, dst, recid, src, recid);
+ if(iph != nil){
+ Translation *q;
+
+ if((q = transbackward(icmp, iph)) == nil)
+ goto raise;
+
+ hnputs_csum(p->src+0, nhgets(q->forward.raddr+IPv4off+0), p->ipcksum);
+ hnputs_csum(p->src+2, nhgets(q->forward.raddr+IPv4off+2), p->ipcksum);
+
+ hnputs_csum(p->icmpid, q->forward.rport, p->cksum);
+ qunlock(icmp);
+
+ icmpproxyadvice(icmp->f, bp, p->src);
+ return;
+ }
for(c = icmp->conv; (s = *c) != nil; c++){
if(s->lport == recid)
if(ipcmp(s->laddr, src) == 0)
@@ -470,9 +557,38 @@
break;
}
}
+raise:
+ qunlock(icmp);
freeblist(bp);
}
+static Block*
+icmpforward(Proto *icmp, Block *bp, Route *r)
+{
+ uchar da[IPaddrlen], sa[IPaddrlen];
+ ushort id;
+ Icmp *p;
+ Translation *q;
+
+ p = (Icmp*)(bp->rp);
+ v4tov6(sa, p->src);
+ v4tov6(da, p->dst);
+ id = nhgets(p->icmpid);
+
+ qlock(icmp);
+ q = transforward(icmp, &((Icmppriv*)icmp->priv)->ht, sa, id, da, id, r);
+ if(q == nil){
+ qunlock(icmp);
+ freeblist(bp);
+ return nil;
+ }
+ memmove(p->src, q->backward.laddr+IPv4off, IPv4addrlen);
+ hnputs_csum(p->icmpid, q->backward.lport, p->cksum);
+ qunlock(icmp);
+
+ return bp;
+}
+
static int
icmpstats(Proto *icmp, char *buf, int len)
{
@@ -511,6 +627,7 @@
icmp->stats = icmpstats;
icmp->ctl = nil;
icmp->advise = icmpadvise;
+ icmp->forward = icmpforward;
icmp->gc = nil;
icmp->ipproto = IP_ICMPPROTO;
icmp->nc = 128;
--- a/sys/src/9/ip/icmp6.c
+++ b/sys/src/9/ip/icmp6.c
@@ -711,9 +711,8 @@
goto raise;
}
p = (IPICMP *)bp->rp;
-
/* get rid of fragment header if this is the first fragment */
- if(p->proto == FH && BLEN(bp) >= MinAdvise+IP6FHDR && MinAdvise > IP6HDR){
+ if((p->vcf[0] & 0xF0) == IP_VER6 && p->proto == FH && BLEN(bp) >= MinAdvise+IP6FHDR && MinAdvise > IP6HDR){
Fraghdr6 *fh = (Fraghdr6*)(bp->rp + IP6HDR);
if((nhgets(fh->offsetRM) & ~7) == 0){ /* first fragment */
p->proto = fh->nexthdr;
@@ -725,9 +724,10 @@
bp->rp -= IP6HDR;
}
}
- if(p->proto != FH){
+ if((p->vcf[0] & 0xF0) == IP_VER6 && p->proto != FH){
pr = Fsrcvpcolx(icmp->f, p->proto);
if(pr != nil && pr->advise != nil) {
+ netlog(icmp->f, Logicmp, "advising %s!%I -> %I: %s\n", pr->name, p->src, p->dst, msg);
(*pr->advise)(pr, bp, msg);
return;
}
--- a/sys/src/9/ip/il.c
+++ b/sys/src/9/ip/il.c
@@ -308,8 +308,8 @@
ic = (Ilcb*)c->ptcl;
ic->state = Ilclosed;
iphtrem(&ipriv->ht, c);
- ipmove(c->laddr, IPnoaddr);
c->lport = 0;
+ ipmove(c->laddr, IPnoaddr);
}
static void
@@ -544,6 +544,7 @@
uchar laddr[IPaddrlen];
ushort sp, dp, csum;
int plen, illen;
+ Iphash *iph;
Conv *new, *s;
Ilpriv *ipriv;
@@ -584,14 +585,14 @@
}
qlock(il);
- s = iphtlook(&ipriv->ht, raddr, dp, laddr, sp);
- if(s == nil){
+ iph = iphtlook(&ipriv->ht, raddr, dp, laddr, sp);
+ if(iph == nil){
if(ih->iltype == Ilsync)
ilreject(il->f, ih); /* no listener */
qunlock(il);
goto raise;
}
-
+ s = iphconv(iph);
ic = (Ilcb*)s->ptcl;
if(ic->state == Illistening){
if(ih->iltype != Ilsync){
--- a/sys/src/9/ip/ip.c
+++ b/sys/src/9/ip/ip.c
@@ -252,7 +252,7 @@
void
ipiput4(Fs *f, Ipifc *ifc, Block *bp)
{
- int hl, len, hop, tos;
+ int hl, len, hop;
uchar v6dst[IPaddrlen];
ushort frag;
Ip4hdr *h;
@@ -327,27 +327,53 @@
/* don't forward if packet has timed out */
hop = h->ttl;
- if(hop < 1) {
+ if(hop <= 1) {
ip->stats[InHdrErrors]++;
icmpttlexceeded(f, ifc, bp);
goto drop;
}
- /* reassemble if the interface expects it */
- if(nifc->reassemble){
+ if(r->type & Rtrans) {
+ p = Fsrcvpcolx(f, h->proto);
+ if(p == nil || p->forward == nil){
+ ip->stats[OutDiscards]++;
+ goto drop;
+ }
+
+ if(hl > IP4HDR) {
+ hl -= IP4HDR;
+ len -= hl;
+ bp->rp += hl;
+ memmove(bp->rp, h, IP4HDR);
+ h = (Ip4hdr*)bp->rp;
+ h->vihl = IP_VER4|IP_HLEN4;
+ hnputs(h->length, len);
+ }
+
frag = nhgets(h->frag);
if(frag & (IP_MF|IP_FO)) {
bp = ip4reassemble(ip, frag, bp);
if(bp == nil)
return;
+ }
+
+ bp = (*p->forward)(p, bp, r);
+ if(bp == nil)
+ return;
+ h = (Ip4hdr*)bp->rp;
+ } else if(nifc->reassemble) {
+ /* reassemble as the interface expects it */
+ frag = nhgets(h->frag);
+ if(frag & (IP_MF|IP_FO)) {
+ bp = ip4reassemble(ip, frag, bp);
+ if(bp == nil)
+ return;
h = (Ip4hdr*)bp->rp;
}
}
ip->stats[ForwDatagrams]++;
- tos = h->tos;
- hop = h->ttl;
- ipoput4(f, bp, 1, hop - 1, tos, &rh);
+ ipoput4(f, bp, 1, hop - 1, h->tos, &rh);
return;
}
--- a/sys/src/9/ip/ip.h
+++ b/sys/src/9/ip/ip.h
@@ -22,6 +22,7 @@
typedef struct Arp Arp;
typedef struct Route Route;
typedef struct Routehint Routehint;
+typedef struct Translation Translation;
typedef struct Routerparams Routerparams;
typedef struct Hostparams Hostparams;
@@ -178,6 +179,71 @@
};
/*
+ * hash table for 2 ip addresses + 2 ports
+ */
+enum
+{
+ Nipht= 521, /* convenient prime */
+
+ IPmatchexact= 0, /* match on 4 tuple */
+ IPmatchany, /* *!* */
+ IPmatchport, /* *!port */
+ IPmatchaddr, /* addr!* */
+ IPmatchpa, /* addr!port */
+};
+
+struct Iphash
+{
+ Iphash *nextiphash;
+
+ uchar trans; /* 0 = conv, 1 = foward, 2 = backward */
+ uchar match;
+ ushort lport; /* local port number */
+ ushort rport; /* remote port number */
+ uchar laddr[IPaddrlen]; /* local IP address */
+ uchar raddr[IPaddrlen]; /* remote IP address */
+};
+
+struct Ipht
+{
+ Lock;
+ Iphash *tab[Nipht];
+};
+
+void iphtadd(Ipht*, Iphash*);
+void iphtrem(Ipht*, Iphash*);
+Iphash *iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp);
+
+/*
+ * NAT entry.
+ *
+ * This holds the 5 tuple as two Iphashes.
+ * The "forward" hash matches the the packets from
+ * the source that need to be translated and
+ * "backward" matches the packets coming back
+ * from the destination.
+ */
+struct Translation
+{
+ Translation *next;
+ Translation **link;
+
+ ulong time;
+
+ Iphash forward;
+#define iphforward(h) ((Translation*)((char*)(h) - (char*)&((Translation*)0)->forward))
+
+ Iphash backward;
+#define iphbackward(h) ((Translation*)((char*)(h) - (char*)&((Translation*)0)->backward))
+
+ /* used for forwarding to the source */
+ Routehint;
+};
+
+Translation *transforward(Proto *p, Ipht *ht, uchar *sa, int sp, uchar *da, int dp, Route *r);
+Translation *transbackward(Proto *p, Iphash *iph);
+
+/*
* one per conversation directory
*/
struct Conv
@@ -187,17 +253,16 @@
int x; /* conversation index */
Proto* p;
- int restricted; /* remote port is restricted */
- int ignoreadvice; /* don't terminate connection on icmp errors */
uint ttl; /* max time to live */
uint tos; /* type of service */
+ uchar restricted; /* remote port is restricted */
+ uchar ignoreadvice; /* don't terminate connection on icmp errors */
uchar ipversion;
- uchar laddr[IPaddrlen]; /* local IP address */
- uchar raddr[IPaddrlen]; /* remote IP address */
- ushort lport; /* local port number */
- ushort rport; /* remote port number */
+ Iphash;
+#define iphconv(h) ((Conv*)((char*)(h) - (char*)&((Conv*)0)->Iphash))
+
char *owner; /* protections */
int perm;
int inuse; /* opens of listen/data/ctl */
@@ -206,7 +271,6 @@
/* udp specific */
int headers; /* data src/dst headers in udp */
- int reliable; /* true if reliable udp */
Conv* incall; /* calls waiting to be listened for */
Conv* next;
@@ -352,34 +416,6 @@
};
/*
- * hash table for 2 ip addresses + 2 ports
- */
-enum
-{
- Nipht= 521, /* convenient prime */
-
- IPmatchexact= 0, /* match on 4 tuple */
- IPmatchany, /* *!* */
- IPmatchport, /* *!port */
- IPmatchaddr, /* addr!* */
- IPmatchpa, /* addr!port */
-};
-struct Iphash
-{
- Iphash *next;
- Conv *c;
- int match;
-};
-struct Ipht
-{
- Lock;
- Iphash *tab[Nipht];
-};
-void iphtadd(Ipht*, Conv*);
-void iphtrem(Ipht*, Conv*);
-Conv* iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp);
-
-/*
* one per multiplexed protocol
*/
struct Proto
@@ -412,10 +448,16 @@
Qid qid; /* qid for protocol directory */
ushort nextrport;
+ /* network address translation */
+ Translation* translations;
+ Block* (*forward)(Proto*, Block*, Route*);
+
void *priv;
};
+int unusedlport(Proto *p);
+
/*
* one per IP protocol stack
*/
@@ -489,6 +531,7 @@
Logrudpmsg= 1<<16,
Logesp= 1<<17,
Logtcpwin= 1<<18,
+ Logtrans= 1<<19,
};
void netloginit(Fs*);
@@ -522,7 +565,9 @@
Rbcast= (1<<4), /* a broadcast self address */
Rmulti= (1<<5), /* a multicast self address */
Rproxy= (1<<6), /* this route should be proxied */
- Rsrc= (1<<7), /* source specific route */
+ Rtrans= (1<<7), /* this route translates source address (NAT) */
+
+ Rsrc= (1<<8), /* source specific route */
};
struct RouteTree
@@ -533,7 +578,7 @@
Ipifc *ifc;
uchar ifcid; /* must match ifc->id */
uchar depth;
- uchar type;
+ ushort type;
char tag[4];
int ref;
};
@@ -641,10 +686,13 @@
extern void v4tov6(uchar *v6, uchar *v4);
extern int v6tov4(uchar *v4, uchar *v6);
extern int eipfmt(Fmt*);
+extern int ipismulticast(uchar *ip);
extern int convipvers(Conv *c);
+extern void hnputs_csum(void *p, ushort v, uchar *pcsum);
#define ipmove(x, y) memmove(x, y, IPaddrlen)
#define ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
+#define isv4mcast(ip4) ((ip4)[0] >= 0xe0 && (ip4)[0] < 0xf0)
extern uchar IPv4bcast[IPaddrlen];
extern uchar IPv4bcastobs[IPaddrlen];
@@ -670,7 +718,6 @@
extern void addipmedium(Medium *med);
extern void ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip, Routehint *rh);
extern int ipforme(Fs*, uchar *addr);
-extern int ipismulticast(uchar *ip);
extern Ipifc* findipifc(Fs*, uchar *local, uchar *remote, int type);
extern Ipifc* findipifcstr(Fs *f, char *s);
extern void findlocalip(Fs*, uchar *local, uchar *remote);
@@ -694,6 +741,8 @@
extern void icmpnoconv(Fs*, Block*);
extern void icmpcantfrag(Fs*, Block*, int);
extern void icmpttlexceeded(Fs*, Ipifc*, Block*);
+extern void icmpproxyadvice(Fs *, Block*, uchar*);
+
extern ushort ipcsum(uchar*);
extern void ipiput4(Fs*, Ipifc*, Block*);
extern void ipiput6(Fs*, Ipifc*, Block*);
--- a/sys/src/9/ip/ipaux.c
+++ b/sys/src/9/ip/ipaux.c
@@ -203,7 +203,6 @@
smcast[15] = a[15];
}
-
/*
* parse a hex mac address
*/
@@ -233,9 +232,36 @@
}
/*
+ * return multicast version if any
+ */
+int
+ipismulticast(uchar *ip)
+{
+ if(isv4(ip)){
+ if(isv4mcast(&ip[IPv4off]))
+ return V4;
+ }
+ else if(isv6mcast(ip))
+ return V6;
+ return 0;
+}
+
+/*
+ * return ip version of a connection
+ */
+int
+convipvers(Conv *c)
+{
+ if(isv4(c->raddr) && isv4(c->laddr) || ipcmp(c->raddr, IPnoaddr) == 0)
+ return V4;
+ else
+ return V6;
+}
+
+/*
* hashing tcp, udp, ... connections
*/
-ulong
+static ulong
iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
{
return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nipht;
@@ -242,136 +268,302 @@
}
void
-iphtadd(Ipht *ht, Conv *c)
+iphtadd(Ipht *ht, Iphash *h)
{
ulong hv;
- Iphash *h;
- hv = iphash(c->raddr, c->rport, c->laddr, c->lport);
- h = smalloc(sizeof(*h));
- if(ipcmp(c->raddr, IPnoaddr) != 0)
+ if(ipcmp(h->raddr, IPnoaddr) != 0)
h->match = IPmatchexact;
else {
- if(ipcmp(c->laddr, IPnoaddr) != 0){
- if(c->lport == 0)
+ if(ipcmp(h->laddr, IPnoaddr) != 0){
+ if(h->lport == 0)
h->match = IPmatchaddr;
else
h->match = IPmatchpa;
} else {
- if(c->lport == 0)
+ if(h->lport == 0)
h->match = IPmatchany;
else
h->match = IPmatchport;
}
}
- h->c = c;
-
lock(ht);
- h->next = ht->tab[hv];
+ hv = iphash(h->raddr, h->rport, h->laddr, h->lport);
+ h->nextiphash = ht->tab[hv];
ht->tab[hv] = h;
unlock(ht);
}
void
-iphtrem(Ipht *ht, Conv *c)
+iphtrem(Ipht *ht, Iphash *h)
{
ulong hv;
- Iphash **l, *h;
+ Iphash **l;
- hv = iphash(c->raddr, c->rport, c->laddr, c->lport);
lock(ht);
- for(l = &ht->tab[hv]; (*l) != nil; l = &(*l)->next)
- if((*l)->c == c){
- h = *l;
- (*l) = h->next;
- free(h);
+ hv = iphash(h->raddr, h->rport, h->laddr, h->lport);
+ for(l = &ht->tab[hv]; (*l) != nil; l = &(*l)->nextiphash)
+ if(*l == h){
+ (*l) = h->nextiphash;
+ h->nextiphash = nil;
break;
}
unlock(ht);
}
-/* look for a matching conversation with the following precedence
- * connected && raddr,rport,laddr,lport
- * announced && laddr,lport
- * announced && *,lport
- * announced && laddr,*
- * announced && *,*
+/* look for a matching iphash with the following precedence
+ * raddr,rport,laddr,lport
+ * laddr,lport
+ * *,lport
+ * laddr,*
+ * *,*
*/
-Conv*
+Iphash*
iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp)
{
ulong hv;
Iphash *h;
- Conv *c;
+ lock(ht);
/* exact 4 pair match (connection) */
hv = iphash(sa, sp, da, dp);
- lock(ht);
- for(h = ht->tab[hv]; h != nil; h = h->next){
+ for(h = ht->tab[hv]; h != nil; h = h->nextiphash){
if(h->match != IPmatchexact)
continue;
- c = h->c;
- if(sp == c->rport && dp == c->lport
- && ipcmp(sa, c->raddr) == 0 && ipcmp(da, c->laddr) == 0){
+ if(sp == h->rport && dp == h->lport
+ && ipcmp(sa, h->raddr) == 0 && ipcmp(da, h->laddr) == 0){
unlock(ht);
- return c;
+ return h;
}
}
/* match local address and port */
hv = iphash(IPnoaddr, 0, da, dp);
- for(h = ht->tab[hv]; h != nil; h = h->next){
+ for(h = ht->tab[hv]; h != nil; h = h->nextiphash){
if(h->match != IPmatchpa)
continue;
- c = h->c;
- if(dp == c->lport && ipcmp(da, c->laddr) == 0){
+ if(dp == h->lport && ipcmp(da, h->laddr) == 0){
unlock(ht);
- return c;
+ return h;
}
}
/* match just port */
hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
- for(h = ht->tab[hv]; h != nil; h = h->next){
+ for(h = ht->tab[hv]; h != nil; h = h->nextiphash){
if(h->match != IPmatchport)
continue;
- c = h->c;
- if(dp == c->lport){
+ if(dp == h->lport){
unlock(ht);
- return c;
+ return h;
}
}
/* match local address */
hv = iphash(IPnoaddr, 0, da, 0);
- for(h = ht->tab[hv]; h != nil; h = h->next){
+ for(h = ht->tab[hv]; h != nil; h = h->nextiphash){
if(h->match != IPmatchaddr)
continue;
- c = h->c;
- if(ipcmp(da, c->laddr) == 0){
+ if(ipcmp(da, h->laddr) == 0){
unlock(ht);
- return c;
+ return h;
}
}
/* look for something that matches anything */
hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
- for(h = ht->tab[hv]; h != nil; h = h->next){
+ for(h = ht->tab[hv]; h != nil; h = h->nextiphash){
if(h->match != IPmatchany)
continue;
- c = h->c;
unlock(ht);
- return c;
+ return h;
}
unlock(ht);
return nil;
}
-int
-convipvers(Conv *c)
+/*
+ * Move entry to front of Proto.translations
+ * and update the timestamp.
+ *
+ * Proto is locked.
+ */
+static Translation*
+transupdate(Proto *p, Translation *q)
{
- if(isv4(c->raddr) && isv4(c->laddr) || ipcmp(c->raddr, IPnoaddr) == 0)
- return V4;
+ q->time = NOW;
+
+ /* unlink */
+ if(q->link != nil && (*q->link = q->next) != nil)
+ q->next->link = q->link;
+
+ /* link to front */
+ if((q->next = p->translations) != nil)
+ q->next->link = &q->next;
+ p->translations = q;
+ q->link = &p->translations;
+
+ return q;
+}
+
+/*
+ * Called with the 4-tuple (sa,sp,da,dp)
+ * that should be source translated,
+ * returning the translation.
+ *
+ * Proto is locked.
+ */
+Translation*
+transforward(Proto *p, Ipht *ht, uchar *sa, int sp, uchar *da, int dp, Route *r)
+{
+ uchar ia[IPaddrlen];
+ Routehint rh;
+ Translation *q;
+ Iphash *iph;
+ Ipifc *ifc;
+ int lport;
+ ulong now;
+ int num;
+
+ /* Translation already exists? */
+ iph = iphtlook(ht, sa, sp, da, dp);
+ if(iph != nil) {
+ if(iph->trans != 1)
+ return nil;
+ return transupdate(p, iphforward(iph));
+ }
+
+ /* Bad source address? */
+ if(ipismulticast(sa) || ipforme(p->f, sa) != 0){
+ netlog(p->f, Logtrans, "trans: bad source address: %s!%I!%d -> %I!%d\n",
+ p->name, sa, sp, da, dp);
+ return nil;
+ }
+
+ /* Bad forward route? */
+ if(r == nil || (ifc = r->ifc) == nil){
+ netlog(p->f, Logtrans, "trans: no forward route: %s!%I!%d -> %I!%d\n",
+ p->name, sa, sp, da, dp);
+ return nil;
+ }
+
+ /* Find a source address on the destination interface */
+ rlock(ifc);
+ memmove(ia, v4prefix, IPv4off);
+ if(!ipv4local(ifc, ia+IPv4off, 0, (r->type & (Rifc|Runi|Rbcast|Rmulti))? da+IPv4off: r->v4.gate)){
+ runlock(ifc);
+ netlog(p->f, Logtrans, "trans: no source ip: %s!%I!%d -> %I!%d\n",
+ p->name, sa, sp, da, dp);
+ return nil;
+ }
+ runlock(ifc);
+
+ /* Check backward route */
+ rh.a = nil;
+ rh.r = nil;
+ if(ipismulticast(da))
+ r = v4lookup(p->f, sa+IPv4off, ia+IPv4off, nil);
else
- return V6;
+ r = v4lookup(p->f, sa+IPv4off, da+IPv4off, &rh);
+ if(r == nil || (r->ifc == ifc && !ifc->reflect)){
+ netlog(p->f, Logtrans, "trans: bad backward route: %s!%I!%d <- %I <- %I!%d\n",
+ p->name, sa, sp, ia, da, dp);
+ return nil;
+ }
+
+ /* Find local port */
+ lport = unusedlport(p);
+ if(lport <= 0){
+ netlog(p->f, Logtrans, "trans: no local port: %s!%I!%d <- %I <- %I!%d\n",
+ p->name, sa, sp, ia, da, dp);
+ return nil;
+ }
+
+ /* Reuse expired entries */
+ num = 0;
+ now = NOW;
+ for(q = p->translations; q != nil; q = q->next) {
+ if(++num >= 1000 || (now - q->time) >= 5*60*1000){
+ netlog(p->f, Logtrans, "trans: removing %s!%I!%d -> %I!%d -> %I!%d\n",
+ p->name,
+ q->forward.raddr, q->forward.rport,
+ q->backward.laddr, q->backward.lport,
+ q->forward.laddr, q->forward.lport);
+
+ iphtrem(ht, &q->forward);
+ iphtrem(ht, &q->backward);
+ break;
+ }
+ }
+ if(q == nil){
+ q = malloc(sizeof(*q));
+ if(q == nil)
+ return nil;
+ q->link = nil;
+ }
+
+ /* Match what needs to be forwarded */
+ q->forward.trans = 1;
+ q->forward.lport = dp;
+ q->forward.rport = sp;
+ ipmove(q->forward.laddr, da);
+ ipmove(q->forward.raddr, sa);
+
+ /* Match what comes back to us */
+ q->backward.trans = 2;
+ q->backward.lport = lport;
+ ipmove(q->backward.laddr, ia);
+ if(p->ipproto == 1 || ipismulticast(da)){
+ q->backward.rport = 0;
+ ipmove(q->backward.raddr, IPnoaddr);
+ } else {
+ q->backward.rport = dp;
+ ipmove(q->backward.raddr, da);
+ }
+ memmove(&q->Routehint, &rh, sizeof(rh));
+
+ netlog(p->f, Logtrans, "trans: adding %s!%I!%d -> %I!%d -> %I!%d\n",
+ p->name,
+ q->forward.raddr, q->forward.rport,
+ q->backward.laddr, q->backward.lport,
+ q->forward.laddr, q->forward.lport);
+
+ iphtadd(ht, &q->forward);
+ iphtadd(ht, &q->backward);
+
+ return transupdate(p, q);
+}
+
+/*
+ * Check if backward translation is valid and
+ * update timestamp.
+ *
+ * Proto is locked.
+ */
+Translation*
+transbackward(Proto *p, Iphash *iph)
+{
+ if(iph == nil || iph->trans != 2)
+ return nil;
+
+ return transupdate(p, iphbackward(iph));
+}
+
+/*
+ * Checksum adjusting hnputs()
+ */
+void
+hnputs_csum(void *p, ushort v, uchar *pcsum)
+{
+ ulong csum;
+
+ assert((((uchar*)p - pcsum) & 1) == 0);
+
+ csum = nhgets(pcsum)^0xFFFF;
+ csum += nhgets(p)^0xFFFF;
+ csum += v;
+ hnputs(p, v);
+ while(v = csum >> 16)
+ csum = (csum & 0xFFFF) + v;
+ hnputs(pcsum, csum^0xFFFF);
}
--- a/sys/src/9/ip/ipifc.c
+++ b/sys/src/9/ip/ipifc.c
@@ -1436,21 +1436,6 @@
}
/*
- * return multicast version if any
- */
-int
-ipismulticast(uchar *ip)
-{
- if(isv4(ip)){
- if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
- return V4;
- }
- else if(ip[0] == 0xff)
- return V6;
- return 0;
-}
-
-/*
* add a multicast address to an interface.
*/
void
--- a/sys/src/9/ip/iproute.c
+++ b/sys/src/9/ip/iproute.c
@@ -875,6 +875,9 @@
case 'p':
if(((type ^= Rptpt) & Rptpt) != Rptpt) return -1;
break;
+ case 't':
+ if(((type ^= Rtrans) & Rtrans) != Rtrans) return -1;
+ break;
case '\0':
return type;
}
@@ -900,6 +903,10 @@
if(type & Rptpt)
*p++ = 'p';
+
+ if(type & Rtrans)
+ *p++ = 't';
+
*p = 0;
}
--- a/sys/src/9/ip/ipv6.c
+++ b/sys/src/9/ip/ipv6.c
@@ -278,7 +278,7 @@
/* don't forward if packet has timed out */
hop = h->ttl;
- if(hop < 1) {
+ if(hop <= 1) {
ip->stats[InHdrErrors]++;
icmpttlexceeded6(f, ifc, bp);
goto drop;
@@ -292,8 +292,7 @@
ip->stats[ForwDatagrams]++;
h = (Ip6hdr*)bp->rp;
tos = (h->vcf[0]&0x0F)<<2 | (h->vcf[1]&0xF0)>>2;
- hop = h->ttl;
- ipoput6(f, bp, 1, hop-1, tos, &rh);
+ ipoput6(f, bp, 1, hop - 1, tos, &rh);
return;
}
--- a/sys/src/9/ip/netlog.c
+++ b/sys/src/9/ip/netlog.c
@@ -51,6 +51,7 @@
{ "udpmsg", Logudp|Logudpmsg, },
{ "ipmsg", Logip|Logipmsg, },
{ "esp", Logesp, },
+ { "trans", Logtrans, },
{ nil, 0, },
};
--- a/sys/src/9/ip/rudp.c
+++ b/sys/src/9/ip/rudp.c
@@ -220,9 +220,10 @@
rudpstartackproc(c->p);
e = Fsstdconnect(c, argv, argc);
Fsconnected(c, e);
+ if(e != nil)
+ return e;
iphtadd(&upriv->ht, c);
-
- return e;
+ return nil;
}
@@ -256,7 +257,6 @@
return e;
Fsconnected(c, nil);
iphtadd(&upriv->ht, c);
-
return nil;
}
@@ -289,10 +289,11 @@
qclose(c->rq);
qclose(c->wq);
qclose(c->eq);
- ipmove(c->laddr, IPnoaddr);
- ipmove(c->raddr, IPnoaddr);
+
c->lport = 0;
+ ipmove(c->laddr, IPnoaddr);
c->rport = 0;
+ ipmove(c->raddr, IPnoaddr);
ucb->headers = 0;
ucb->randdrop = 0;
@@ -460,11 +461,12 @@
void
rudpiput(Proto *rudp, Ipifc *ifc, Block *bp)
{
- int len, olen, ottl;
+ int len, olen;
Udphdr *uh;
+ Iphash *iph;
Conv *c;
Rudpcb *ucb;
- uchar raddr[IPaddrlen], laddr[IPaddrlen];
+ uchar raddr[IPaddrlen], laddr[IPaddrlen], ottl;
ushort rport, lport;
Rudppriv *upriv;
Fs *f;
@@ -503,9 +505,8 @@
}
qlock(rudp);
-
- c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
- if(c == nil){
+ iph = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
+ if(iph == nil){
/* no conversation found */
upriv->ustats.rudpNoPorts++;
qunlock(rudp);
@@ -517,6 +518,7 @@
freeblist(bp);
return;
}
+ c = iphconv(iph);
ucb = (Rudpcb*)c->ptcl;
qlock(ucb);
qunlock(rudp);
--- a/sys/src/9/ip/tcp.c
+++ b/sys/src/9/ip/tcp.c
@@ -126,7 +126,7 @@
uchar length[2]; /* packet length */
uchar id[2]; /* Identification */
uchar frag[2]; /* Fragment information */
- uchar Unused;
+ uchar ttl;
uchar proto;
uchar tcplen[2];
uchar tcpsrc[4];
@@ -1814,9 +1814,7 @@
}
tcpsetstate(new, Established);
-
iphtadd(&tpriv->ht, new);
-
return new;
}
@@ -2068,10 +2066,11 @@
Tcp seg;
Tcp4hdr *h4;
Tcp6hdr *h6;
- int hdrlen;
Tcpctl *tcb;
- ushort length, csum;
+ int hdrlen;
+ ushort length;
uchar source[IPaddrlen], dest[IPaddrlen];
+ Iphash *iph;
Conv *s;
Fs *f;
Tcppriv *tpriv;
@@ -2087,15 +2086,25 @@
h6 = (Tcp6hdr*)(bp->rp);
if((h4->vihl&0xF0)==IP_VER4) {
+ int ttl = h4->ttl;
+
version = V4;
length = nhgets(h4->length);
+ if(length < TCP4_PKT){
+ tpriv->stats[HlenErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "bad tcp len\n");
+ freeblist(bp);
+ return;
+ }
+ length -= TCP4_PKT;
v4tov6(dest, h4->tcpdst);
v4tov6(source, h4->tcpsrc);
- h4->Unused = 0;
- hnputs(h4->tcplen, length-TCP4_PKT);
- if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) &&
- ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) {
+ h4->ttl = 0;
+ hnputs(h4->tcplen, length);
+ if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1])
+ && ptclcsum(bp, TCP4_IPLEN, length + TCP4_PKT - TCP4_IPLEN)) {
tpriv->stats[CsumErrs]++;
tpriv->stats[InErrs]++;
netlog(f, Logtcp, "bad tcp proto cksum\n");
@@ -2102,6 +2111,7 @@
freeblist(bp);
return;
}
+ h4->ttl = ttl;
hdrlen = ntohtcp4(&seg, &bp);
if(hdrlen < 0){
@@ -2110,16 +2120,8 @@
netlog(f, Logtcp, "bad tcp hdr len\n");
return;
}
-
- /* trim the packet to the size claimed by the datagram */
- length -= hdrlen+TCP4_PKT;
- bp = trimblock(bp, hdrlen+TCP4_PKT, length);
- if(bp == nil){
- tpriv->stats[LenErrs]++;
- tpriv->stats[InErrs]++;
- netlog(f, Logtcp, "tcp len < 0 after trim\n");
- return;
- }
+ length -= hdrlen;
+ hdrlen += TCP4_PKT;
}
else {
int ttl = h6->ttl;
@@ -2133,13 +2135,13 @@
h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0;
h6->ttl = proto;
hnputl(h6->vcf, length);
- if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
- (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
+ if((h6->tcpcksum[0] || h6->tcpcksum[1])
+ && ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE) != 0) {
tpriv->stats[CsumErrs]++;
tpriv->stats[InErrs]++;
netlog(f, Logtcp,
- "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
- h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
+ "bad tcpv6 proto cksum: got %#ux\n",
+ h6->tcpcksum[0]<<8 | h6->tcpcksum[1]);
freeblist(bp);
return;
}
@@ -2154,16 +2156,8 @@
netlog(f, Logtcp, "bad tcpv6 hdr len\n");
return;
}
-
- /* trim the packet to the size claimed by the datagram */
length -= hdrlen;
- bp = trimblock(bp, hdrlen+TCP6_PKT, length);
- if(bp == nil){
- tpriv->stats[LenErrs]++;
- tpriv->stats[InErrs]++;
- netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
- return;
- }
+ hdrlen += TCP6_PKT;
}
/* lock protocol while searching for a conversation */
@@ -2170,8 +2164,8 @@
qlock(tcp);
/* Look for a matching conversation */
- s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
- if(s == nil){
+ iph = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
+ if(iph == nil){
netlog(f, Logtcp, "iphtlook(src %I!%d, dst %I!%d) failed\n",
source, seg.source, dest, seg.dest);
reset:
@@ -2180,7 +2174,31 @@
sndrst(tcp, source, dest, length, &seg, version, "no conversation", nil);
return;
}
+ if(iph->trans){
+ Translation *q;
+ int hop = h4->ttl;
+ if(hop <= 1 || (q = transbackward(tcp, iph)) == nil)
+ goto reset;
+ hnputs_csum(h4->tcpdst+0, nhgets(q->forward.raddr+IPv4off+0), h4->tcpcksum);
+ hnputs_csum(h4->tcpdst+2, nhgets(q->forward.raddr+IPv4off+2), h4->tcpcksum);
+ hnputs_csum(h4->tcpdport, q->forward.rport, h4->tcpcksum);
+ qunlock(tcp);
+ ipoput4(f, bp, 1, hop - 1, h4->tos, q);
+ return;
+ }
+ s = iphconv(iph);
+
+ /* trim off ip and tcp headers */
+ bp = trimblock(bp, hdrlen, length);
+ if(bp == nil){
+ tpriv->stats[LenErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "tcp bad length after header trim off\n");
+ qunlock(tcp);
+ return;
+ }
+
/* if it's a listener, look for the right flags and get a new conv */
tcb = (Tcpctl*)s->ptcl;
if(tcb->state == Listen){
@@ -3200,11 +3218,12 @@
{
Tcp4hdr *h4;
Tcp6hdr *h6;
- Tcpctl *tcb;
uchar source[IPaddrlen];
uchar dest[IPaddrlen];
ushort psource, pdest;
- Conv *s, **p;
+ Iphash *iph;
+ Tcpctl *tcb;
+ Conv *s;
h4 = (Tcp4hdr*)(bp->rp);
h6 = (Tcp6hdr*)(bp->rp);
@@ -3221,33 +3240,73 @@
pdest = nhgets(h6->tcpdport);
}
- /* Look for a connection */
+ /* Look for a connection (source/dest reversed; this is the original packet we sent) */
qlock(tcp);
- for(p = tcp->conv; (s = *p) != nil; p++) {
- tcb = (Tcpctl*)s->ptcl;
- if(s->rport == pdest)
- if(s->lport == psource)
- if(tcb->state != Closed)
- if(ipcmp(s->raddr, dest) == 0)
- if(ipcmp(s->laddr, source) == 0){
- if(s->ignoreadvice)
- break;
- qlock(s);
- qunlock(tcp);
- switch(tcb->state){
- case Syn_sent:
- localclose(s, msg);
- break;
- }
- qunlock(s);
- freeblist(bp);
- return;
- }
+ iph = iphtlook(&((Tcppriv*)tcp->priv)->ht, dest, pdest, source, psource);
+ if(iph == nil)
+ goto raise;
+ if(iph->trans){
+ Translation *q;
+
+ if((q = transbackward(tcp, iph)) == nil)
+ goto raise;
+
+ /* h4->tcplen is the ip header checksum */
+ hnputs_csum(h4->tcpsrc+0, nhgets(q->forward.raddr+IPv4off+0), h4->tcplen);
+ hnputs_csum(h4->tcpsrc+2, nhgets(q->forward.raddr+IPv4off+2), h4->tcplen);
+
+ /* dont bother fixing tcp checksum, packet is most likely truncated */
+ hnputs(h4->tcpsport, q->forward.rport);
+ qunlock(tcp);
+
+ icmpproxyadvice(tcp->f, bp, h4->tcpsrc);
+ return;
}
+ s = iphconv(iph);
+ if(s->ignoreadvice || s->state == Closed)
+ goto raise;
+ qlock(s);
qunlock(tcp);
+ tcb = (Tcpctl*)s->ptcl;
+ if(tcb->state == Syn_sent)
+ localclose(s, msg);
+ qunlock(s);
freeblist(bp);
+ return;
+raise:
+ qunlock(tcp);
+ freeblist(bp);
}
+static Block*
+tcpforward(Proto *tcp, Block *bp, Route *r)
+{
+ uchar da[IPaddrlen], sa[IPaddrlen];
+ ushort dp, sp;
+ Tcp4hdr *h4;
+ Translation *q;
+
+ h4 = (Tcp4hdr*)(bp->rp);
+ v4tov6(da, h4->tcpdst);
+ v4tov6(sa, h4->tcpsrc);
+ dp = nhgets(h4->tcpdport);
+ sp = nhgets(h4->tcpsport);
+
+ qlock(tcp);
+ q = transforward(tcp, &((Tcppriv*)tcp->priv)->ht, sa, sp, da, dp, r);
+ if(q == nil){
+ qunlock(tcp);
+ freeblist(bp);
+ return nil;
+ }
+ hnputs_csum(h4->tcpsrc+0, nhgets(q->backward.laddr+IPv4off+0), h4->tcpcksum);
+ hnputs_csum(h4->tcpsrc+2, nhgets(q->backward.laddr+IPv4off+2), h4->tcpcksum);
+ hnputs_csum(h4->tcpsport, q->backward.lport, h4->tcpcksum);
+ qunlock(tcp);
+
+ return bp;
+}
+
static char*
tcpporthogdefensectl(char *val)
{
@@ -3371,6 +3430,7 @@
tcp->close = tcpclose;
tcp->rcv = tcpiput;
tcp->advise = tcpadvise;
+ tcp->forward = tcpforward;
tcp->stats = tcpstats;
tcp->inuse = tcpinuse;
tcp->gc = tcpgc;
--- a/sys/src/9/ip/udp.c
+++ b/sys/src/9/ip/udp.c
@@ -39,7 +39,7 @@
uchar length[2]; /* packet length */
uchar id[2]; /* Identification */
uchar frag[2]; /* Fragment information */
- uchar Unused;
+ uchar ttl; /* Time to live */
uchar udpproto; /* Protocol */
uchar udpplen[2]; /* Header plus data length */
uchar udpsrc[IPv4addrlen]; /* Ip source */
@@ -91,7 +91,6 @@
ulong lenerr; /* short packet */
};
-void (*etherprofiler)(char *name, int qlen);
void udpkick(void *x, Block *bp);
/*
@@ -114,7 +113,6 @@
Fsconnected(c, e);
if(e != nil)
return e;
-
iphtadd(&upriv->ht, c);
return nil;
}
@@ -142,7 +140,6 @@
return e;
Fsconnected(c, nil);
iphtadd(&upriv->ht, c);
-
return nil;
}
@@ -166,10 +163,10 @@
qclose(c->rq);
qclose(c->wq);
qclose(c->eq);
- ipmove(c->laddr, IPnoaddr);
- ipmove(c->raddr, IPnoaddr);
c->lport = 0;
+ ipmove(c->laddr, IPnoaddr);
c->rport = 0;
+ ipmove(c->raddr, IPnoaddr);
ucb = (Udpcb*)c->ptcl;
ucb->headers = 0;
@@ -238,7 +235,7 @@
bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
uh4 = (Udp4hdr *)(bp->rp);
ptcllen = dlen + UDP_UDPHDR_SZ;
- uh4->Unused = 0;
+ uh4->ttl = 0;
uh4->udpproto = IP_UDPPROTO;
uh4->frag[0] = 0;
uh4->frag[1] = 0;
@@ -319,6 +316,7 @@
int len;
Udp4hdr *uh4;
Udp6hdr *uh6;
+ Iphash *iph;
Conv *c;
Udpcb *ucb;
uchar raddr[IPaddrlen], laddr[IPaddrlen];
@@ -334,6 +332,7 @@
upriv->ustats.udpInDatagrams++;
uh4 = (Udp4hdr*)(bp->rp);
+ uh6 = (Udp6hdr*)(bp->rp);
version = ((uh4->vihl&0xF0)==IP_VER6) ? V6 : V4;
/* Put back pseudo header for checksum
@@ -340,8 +339,8 @@
* (remember old values for icmpnoconv()) */
switch(version) {
case V4:
- ottl = uh4->Unused;
- uh4->Unused = 0;
+ ottl = uh4->ttl;
+ uh4->ttl = 0;
len = nhgets(uh4->udplen);
olen = nhgets(uh4->udpplen);
hnputs(uh4->udpplen, len);
@@ -360,11 +359,10 @@
return;
}
}
- uh4->Unused = ottl;
+ uh4->ttl = ottl;
hnputs(uh4->udpplen, olen);
break;
case V6:
- uh6 = (Udp6hdr*)(bp->rp);
len = nhgets(uh6->udplen);
oviclfl = nhgetl(uh6->viclfl);
olen = nhgets(uh6->len);
@@ -394,9 +392,8 @@
}
qlock(udp);
-
- c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
- if(c == nil){
+ iph = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
+ if(iph == nil){
/* no conversation found */
upriv->ustats.udpNoPorts++;
qunlock(udp);
@@ -417,6 +414,26 @@
freeblist(bp);
return;
}
+ if(iph->trans){
+ Translation *q;
+ int hop = uh4->ttl;
+ if(hop <= 1 || (q = transbackward(udp, iph)) == nil){
+ qunlock(udp);
+ freeblist(bp);
+ return;
+ }
+ hnputs_csum(uh4->udpdst+0, nhgets(q->forward.raddr+IPv4off+0), uh4->udpcksum);
+ hnputs_csum(uh4->udpdst+2, nhgets(q->forward.raddr+IPv4off+2), uh4->udpcksum);
+ hnputs_csum(uh4->udpdport, q->forward.rport, uh4->udpcksum);
+
+ /* only use route-hint when from original desination */
+ if(memcmp(uh4->udpsrc, q->forward.laddr+IPv4off, IPv4addrlen) != 0)
+ q = nil;
+ qunlock(udp);
+ ipoput4(f, bp, 1, hop - 1, uh4->tos, q);
+ return;
+ }
+ c = iphconv(iph);
ucb = (Udpcb*)c->ptcl;
if(c->state == Announced){
@@ -487,7 +504,6 @@
qpass(c->rq, concatblock(bp));
}
qunlock(c);
-
}
char*
@@ -517,7 +533,8 @@
Udp6hdr *h6;
uchar source[IPaddrlen], dest[IPaddrlen];
ushort psource, pdest;
- Conv *s, **p;
+ Iphash *iph;
+ Conv *s;
h4 = (Udp4hdr*)(bp->rp);
h6 = (Udp6hdr*)(bp->rp);
@@ -534,28 +551,72 @@
pdest = nhgets(h6->udpdport);
}
- /* Look for a connection */
+ /* Look for a connection (source/dest reversed; this is the original packet we sent) */
qlock(udp);
- for(p = udp->conv; (s = *p) != nil; p++) {
- if(s->rport == pdest)
- if(s->lport == psource)
- if(ipcmp(s->raddr, dest) == 0)
- if(ipcmp(s->laddr, source) == 0){
- if(s->ignoreadvice)
- break;
- qlock(s);
- qunlock(udp);
- qhangup(s->rq, msg);
- qhangup(s->wq, msg);
- qunlock(s);
- freeblist(bp);
- return;
- }
+ iph = iphtlook(&((Udppriv*)udp->priv)->ht, dest, pdest, source, psource);
+ if(iph == nil)
+ goto raise;
+ if(iph->trans){
+ Translation *q;
+
+ if((q = transbackward(udp, iph)) == nil)
+ goto raise;
+
+ /* h4->udpplen is the ip header checksum */
+ hnputs_csum(h4->udpsrc+0, nhgets(q->forward.raddr+IPv4off+0), h4->udpplen);
+ hnputs_csum(h4->udpsrc+2, nhgets(q->forward.raddr+IPv4off+2), h4->udpplen);
+
+ /* dont bother fixing udp checksum, packet is most likely truncated */
+ hnputs(h4->udpsport, q->forward.rport);
+ qunlock(udp);
+
+ icmpproxyadvice(udp->f, bp, h4->udpsrc);
+ return;
}
+ s = iphconv(iph);
+ if(s->ignoreadvice)
+ goto raise;
+ qlock(s);
qunlock(udp);
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+ qunlock(s);
freeblist(bp);
+ return;
+raise:
+ qunlock(udp);
+ freeblist(bp);
}
+Block*
+udpforward(Proto *udp, Block *bp, Route *r)
+{
+ uchar da[IPaddrlen], sa[IPaddrlen];
+ ushort dp, sp;
+ Udp4hdr *uh4;
+ Translation *q;
+
+ uh4 = (Udp4hdr*)(bp->rp);
+ v4tov6(sa, uh4->udpsrc);
+ v4tov6(da, uh4->udpdst);
+ dp = nhgets(uh4->udpdport);
+ sp = nhgets(uh4->udpsport);
+
+ qlock(udp);
+ q = transforward(udp, &((Udppriv*)udp->priv)->ht, sa, sp, da, dp, r);
+ if(q == nil){
+ qunlock(udp);
+ freeblist(bp);
+ return nil;
+ }
+ hnputs_csum(uh4->udpsrc+0, nhgets(q->backward.laddr+IPv4off+0), uh4->udpcksum);
+ hnputs_csum(uh4->udpsrc+2, nhgets(q->backward.laddr+IPv4off+2), uh4->udpcksum);
+ hnputs_csum(uh4->udpsport, q->backward.lport, uh4->udpcksum);
+ qunlock(udp);
+
+ return bp;
+}
+
int
udpstats(Proto *udp, char *buf, int len)
{
@@ -586,6 +647,7 @@
udp->close = udpclose;
udp->rcv = udpiput;
udp->advise = udpadvise;
+ udp->forward = udpforward;
udp->stats = udpstats;
udp->ipproto = IP_UDPPROTO;
udp->nc = Nchans;