shithub: riscv

Download patch

ref: f2bd1de5bdba2449f01085984483702adb833fea
parent: 3720b5ab9c4cb485c64e83d8af740aea3680123b
author: cinap_lenrek <[email protected]>
date: Sun Nov 24 06:56:33 EST 2013

webfs: support for internationalized domain name urls

--- a/sys/src/cmd/webfs/dat.h
+++ b/sys/src/cmd/webfs/dat.h
@@ -68,3 +68,7 @@
 Url	*proxy;
 int	timeout;
 char	*whitespace;
+
+enum {
+	Domlen = 256,
+};
--- a/sys/src/cmd/webfs/fns.h
+++ b/sys/src/cmd/webfs/fns.h
@@ -16,6 +16,7 @@
 #pragma varargck type "E" Str2
 
 int	Efmt(Fmt*);
+int	Hfmt(Fmt*);
 int	Ufmt(Fmt*);
 char*	Upath(Url *);
 Url*	url(char *s, Url *b);
@@ -22,6 +23,10 @@
 Url*	saneurl(Url *u);
 int	matchurl(Url *u, Url *s);
 void	freeurl(Url *u);
+
+/* idn */
+char*	idn2utf(char *name, char *buf, int nbuf);
+char*	utf2idn(char *name, char *buf, int nbuf);
 
 /* buq */
 int	buread(Buq *q, void *v, int l);
--- a/sys/src/cmd/webfs/fs.c
+++ b/sys/src/cmd/webfs/fs.c
@@ -765,6 +765,7 @@
 	quotefmtinstall();
 	fmtinstall('U', Ufmt);
 	fmtinstall('E', Efmt);
+	fmtinstall('H', Hfmt);
 
 	srv = nil;
 	mtpt = "/mnt/web";
--- a/sys/src/cmd/webfs/http.c
+++ b/sys/src/cmd/webfs/http.c
@@ -573,7 +573,7 @@
 			ru.path = Upath(u);
 			ru.query = u->query;
 		}
-		n = snprint(buf, sizeof(buf), "%s %U HTTP/1.1\r\nHost: %s%s%s\r\n",
+		n = snprint(buf, sizeof(buf), "%s %U HTTP/1.1\r\nHost: %H%s%s\r\n",
 			method, &ru, u->host, u->port ? ":" : "", u->port ? u->port : "");
 		if(n >= sizeof(buf)-64){
 			werrstr("request too large");
--- /dev/null
+++ b/sys/src/cmd/webfs/idn.c
@@ -1,0 +1,267 @@
+#include <u.h>
+#include <libc.h>
+#include <ctype.h>
+#include <fcall.h>
+#include <thread.h>
+#include <9p.h>
+
+#include "dat.h"
+#include "fns.h"
+
+enum {
+	base = 36,
+	tmin = 1,
+	tmax = 26,
+	skew = 38,
+	damp = 700,
+	initial_bias = 72,
+	initial_n = 0x80,
+};
+
+static uint maxint = ~0;
+
+static uint
+decode_digit(uint cp)
+{
+	if((cp - '0') < 10)
+		return cp - ('0' - 26);
+	if((cp - 'A') < 26)
+		return cp - 'A';
+	if((cp - 'a') < 26)
+		return cp - 'a';
+	return base;
+}
+
+static char
+encode_digit(uint d, int flag)
+{
+	if(d < 26)
+		return d + (flag ? 'A' : 'a');
+	return d + ('0' - 26);
+}
+
+static uint
+adapt(uint delta, uint numpoints, int firsttime)
+{
+	uint k;
+
+	delta = firsttime ? delta / damp : delta >> 1;
+	delta += delta / numpoints;
+	for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base)
+		delta /= base - tmin;
+	return k + (base - tmin + 1) * delta / (delta + skew);
+}
+
+static int
+punyencode(uint input_length, Rune input[], uint max_out, char output[])
+{
+	uint n, delta, h, b, out, bias, j, m, q, k, t;
+
+	n = initial_n;
+	delta = out = 0;
+	bias = initial_bias;
+
+	for (j = 0;  j < input_length;  ++j) {
+		if ((uint)input[j] < 0x80) {
+			if (max_out - out < 2)
+				return -1;
+			output[out++] = input[j];
+		}
+	}
+
+	h = b = out;
+
+	if (b > 0)
+		output[out++] = '-';
+
+	while (h < input_length) {
+		for (m = maxint, j = 0; j < input_length; ++j) {
+			if (input[j] >= n && input[j] < m)
+				m = input[j];
+		}
+
+		if (m - n > (maxint - delta) / (h + 1))
+			return -1;
+
+		delta += (m - n) * (h + 1);
+		n = m;
+
+		for (j = 0;  j < input_length;  ++j) {
+			if (input[j] < n) {
+				if (++delta == 0)
+					return -1;
+			}
+
+			if (input[j] == n) {
+				for (q = delta, k = base;; k += base) {
+					if (out >= max_out)
+						return -1;
+					if (k <= bias)
+						t = tmin;
+					else if (k >= bias + tmax)
+						t = tmax;
+					else
+						t = k - bias;
+					if (q < t)
+						break;
+					output[out++] = encode_digit(t + (q - t) % (base - t), 0);
+					q = (q - t) / (base - t);
+				}
+				output[out++] = encode_digit(q, isupperrune(input[j]));
+				bias = adapt(delta, h + 1, h == b);
+				delta = 0;
+				++h;
+			}
+		}
+
+		++delta, ++n;
+	}
+
+	return (int)out;
+}
+
+static int
+punydecode(uint input_length, char input[], uint max_out, Rune output[])
+{
+	uint n, out, i, bias, b, j, in, oldi, w, k, digit, t;
+
+	n = initial_n;
+	out = i = 0;
+	bias = initial_bias;
+
+	for (b = j = 0; j < input_length; ++j)
+		if (input[j] == '-')
+			b = j;
+
+	if (b > max_out)
+		return -1;
+
+	for (j = 0;  j < b;  ++j) {
+		if (input[j] & 0x80)
+			return -1;
+		output[out++] = input[j];
+	}
+
+	for (in = b > 0 ? b + 1 : 0; in < input_length; ++out) {
+		for (oldi = i, w = 1, k = base;; k += base) {
+			if (in >= input_length)
+				return -1;
+			digit = decode_digit(input[in++]);
+			if (digit >= base)
+				return -1;
+			if (digit > (maxint - i) / w)
+				return -1;
+			i += digit * w;
+			if (k <= bias)
+				t = tmin;
+			else if (k >= bias + tmax)
+				t = tmax;
+			else
+				t = k - bias;
+			if (digit < t)
+				break;
+			if (w > maxint / (base - t))
+				return -1;
+			w *= (base - t);
+		}
+
+		bias = adapt(i - oldi, out + 1, oldi == 0);
+
+		if (i / (out + 1) > maxint - n)
+			return -1;
+		n += i / (out + 1);
+		i %= (out + 1);
+
+		if (out >= max_out)
+			return -1;
+
+		memmove(output + i + 1, output + i, (out - i) * sizeof *output);
+		if(((uint)input[in-1] - 'A') < 26)
+			output[i++] = toupperrune(n);
+		else
+			output[i++] = tolowerrune(n);
+	}
+
+	return (int)out;
+}
+
+/*
+ * convert punycode encoded internationalized
+ * domain name to unicode string
+ */
+char*
+idn2utf(char *name, char *buf, int nbuf)
+{
+	char *dp, *de, *cp;
+	Rune rb[Domlen], r;
+	int nc, nr, n;
+
+	cp = name;
+	dp = buf;
+	de = dp+nbuf-1;
+	for(;;){
+		nc = nr = 0;
+		while(cp[nc] != 0){
+			n = chartorune(&r, cp+nc);
+			if(r == '.')
+				break;
+			rb[nr++] = r;
+			nc += n;
+		}
+		if(cistrncmp(cp, "xn--", 4) == 0)
+			if((nr = punydecode(nc-4, cp+4, nelem(rb), rb)) < 0)
+				return nil;
+		dp = seprint(dp, de, "%.*S", nr, rb);
+		if(dp >= de)
+			return nil;
+		if(cp[nc] == 0)
+			break;
+		*dp++ = '.';
+		cp += nc+1;
+	}
+	*dp = 0;
+	return buf;
+}
+
+/*
+ * convert unicode string to punycode
+ * encoded internationalized domain name
+ */
+char*
+utf2idn(char *name, char *buf, int nbuf)
+{
+	char *dp, *de, *cp;
+	Rune rb[Domlen], r;
+	int nc, nr, n;
+
+	dp = buf;
+	de = dp+nbuf-1;
+	cp = name;
+	for(;;){
+		nc = nr = 0;
+		while(cp[nc] != 0 && nr < nelem(rb)){
+			n = chartorune(&r, cp+nc);
+			if(r == '.')
+				break;
+			rb[nr++] = r;
+			nc += n;
+		}
+		if(nc == nr)
+			dp = seprint(dp, de, "%.*s", nc, cp);
+		else {
+			dp = seprint(dp, de, "xn--");
+			if((n = punyencode(nr, rb, de - dp, dp)) < 0)
+				return nil;
+			dp += n;
+		}
+		if(dp >= de)
+			return nil;
+		if(cp[nc] == 0)
+			break;
+		*dp++ = '.';
+		cp += nc+1;
+	}
+	*dp = 0;
+	return buf;
+}
+
--- a/sys/src/cmd/webfs/mkfile
+++ b/sys/src/cmd/webfs/mkfile
@@ -3,6 +3,6 @@
 TARG=webfs
 
 HFILES=fns.h dat.h
-OFILES=sub.$O url.$O buq.$O http.$O fs.$O
+OFILES=sub.$O url.$O buq.$O http.$O fs.$O idn.$O
 
 </sys/src/cmd/mkone
--- a/sys/src/cmd/webfs/url.c
+++ b/sys/src/cmd/webfs/url.c
@@ -69,6 +69,21 @@
 }
 
 int
+Hfmt(Fmt *f)
+{
+	char *d, *s;
+
+	s = va_arg(f->args, char*);
+	d = emalloc(Domlen);
+	if(utf2idn(s, d, Domlen) == nil)
+		d = s;
+	fmtprint(f, "%s", d);
+	if(d != s)
+		free(d);
+	return 0;
+}
+
+int
 Ufmt(Fmt *f)
 {
 	char *s;
@@ -87,7 +102,7 @@
 		fmtprint(f, "@");
 	}
 	if(u->host){
-		fmtprint(f, strchr(u->host, ':') ? "[%s]" : "%s", u->host);
+		fmtprint(f, strchr(u->host, ':') ? "[%s]" : "%H", u->host);
 		if(u->port)
 			fmtprint(f, ":%s", u->port);
 	}
@@ -184,12 +199,17 @@
 static char*
 mklowcase(char *s)
 {
-	char *p;
-
+	char *cp;
+	Rune r;
+	
 	if(s == nil)
 		return s;
-	for(p = s; *p; p++)
-		*p = tolower(*p);
+	cp = s;
+	while(*cp != 0){
+		chartorune(&r, cp);
+		r = tolowerrune(r);
+		cp += runetochar(cp, &r);
+	}
 	return s;
 }
 
@@ -298,6 +318,15 @@
 	if(s = u->query)
 		while(s = strchr(s, '+'))
 			*s++ = ' ';
+
+	if(s = u->host){
+		t = emalloc(Domlen);
+		if(idn2utf(s, t, Domlen)){
+			u->host = estrdup(t);
+			free(s);
+		}
+		free(t);
+	}
 
 	unescape(u->user, "");
 	unescape(u->pass, "");