ref: f2bd1de5bdba2449f01085984483702adb833fea
parent: 3720b5ab9c4cb485c64e83d8af740aea3680123b
author: cinap_lenrek <[email protected]>
date: Sun Nov 24 06:56:33 EST 2013
webfs: support for internationalized domain name urls
--- a/sys/src/cmd/webfs/dat.h
+++ b/sys/src/cmd/webfs/dat.h
@@ -68,3 +68,7 @@
Url *proxy;
int timeout;
char *whitespace;
+
+enum {
+ Domlen = 256,
+};
--- a/sys/src/cmd/webfs/fns.h
+++ b/sys/src/cmd/webfs/fns.h
@@ -16,6 +16,7 @@
#pragma varargck type "E" Str2
int Efmt(Fmt*);
+int Hfmt(Fmt*);
int Ufmt(Fmt*);
char* Upath(Url *);
Url* url(char *s, Url *b);
@@ -22,6 +23,10 @@
Url* saneurl(Url *u);
int matchurl(Url *u, Url *s);
void freeurl(Url *u);
+
+/* idn */
+char* idn2utf(char *name, char *buf, int nbuf);
+char* utf2idn(char *name, char *buf, int nbuf);
/* buq */
int buread(Buq *q, void *v, int l);
--- a/sys/src/cmd/webfs/fs.c
+++ b/sys/src/cmd/webfs/fs.c
@@ -765,6 +765,7 @@
quotefmtinstall();
fmtinstall('U', Ufmt);
fmtinstall('E', Efmt);
+ fmtinstall('H', Hfmt);
srv = nil;
mtpt = "/mnt/web";
--- a/sys/src/cmd/webfs/http.c
+++ b/sys/src/cmd/webfs/http.c
@@ -573,7 +573,7 @@
ru.path = Upath(u);
ru.query = u->query;
}
- n = snprint(buf, sizeof(buf), "%s %U HTTP/1.1\r\nHost: %s%s%s\r\n",
+ n = snprint(buf, sizeof(buf), "%s %U HTTP/1.1\r\nHost: %H%s%s\r\n",
method, &ru, u->host, u->port ? ":" : "", u->port ? u->port : "");
if(n >= sizeof(buf)-64){
werrstr("request too large");
--- /dev/null
+++ b/sys/src/cmd/webfs/idn.c
@@ -1,0 +1,267 @@
+#include <u.h>
+#include <libc.h>
+#include <ctype.h>
+#include <fcall.h>
+#include <thread.h>
+#include <9p.h>
+
+#include "dat.h"
+#include "fns.h"
+
+enum {
+ base = 36,
+ tmin = 1,
+ tmax = 26,
+ skew = 38,
+ damp = 700,
+ initial_bias = 72,
+ initial_n = 0x80,
+};
+
+static uint maxint = ~0;
+
+static uint
+decode_digit(uint cp)
+{
+ if((cp - '0') < 10)
+ return cp - ('0' - 26);
+ if((cp - 'A') < 26)
+ return cp - 'A';
+ if((cp - 'a') < 26)
+ return cp - 'a';
+ return base;
+}
+
+static char
+encode_digit(uint d, int flag)
+{
+ if(d < 26)
+ return d + (flag ? 'A' : 'a');
+ return d + ('0' - 26);
+}
+
+static uint
+adapt(uint delta, uint numpoints, int firsttime)
+{
+ uint k;
+
+ delta = firsttime ? delta / damp : delta >> 1;
+ delta += delta / numpoints;
+ for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base)
+ delta /= base - tmin;
+ return k + (base - tmin + 1) * delta / (delta + skew);
+}
+
+static int
+punyencode(uint input_length, Rune input[], uint max_out, char output[])
+{
+ uint n, delta, h, b, out, bias, j, m, q, k, t;
+
+ n = initial_n;
+ delta = out = 0;
+ bias = initial_bias;
+
+ for (j = 0; j < input_length; ++j) {
+ if ((uint)input[j] < 0x80) {
+ if (max_out - out < 2)
+ return -1;
+ output[out++] = input[j];
+ }
+ }
+
+ h = b = out;
+
+ if (b > 0)
+ output[out++] = '-';
+
+ while (h < input_length) {
+ for (m = maxint, j = 0; j < input_length; ++j) {
+ if (input[j] >= n && input[j] < m)
+ m = input[j];
+ }
+
+ if (m - n > (maxint - delta) / (h + 1))
+ return -1;
+
+ delta += (m - n) * (h + 1);
+ n = m;
+
+ for (j = 0; j < input_length; ++j) {
+ if (input[j] < n) {
+ if (++delta == 0)
+ return -1;
+ }
+
+ if (input[j] == n) {
+ for (q = delta, k = base;; k += base) {
+ if (out >= max_out)
+ return -1;
+ if (k <= bias)
+ t = tmin;
+ else if (k >= bias + tmax)
+ t = tmax;
+ else
+ t = k - bias;
+ if (q < t)
+ break;
+ output[out++] = encode_digit(t + (q - t) % (base - t), 0);
+ q = (q - t) / (base - t);
+ }
+ output[out++] = encode_digit(q, isupperrune(input[j]));
+ bias = adapt(delta, h + 1, h == b);
+ delta = 0;
+ ++h;
+ }
+ }
+
+ ++delta, ++n;
+ }
+
+ return (int)out;
+}
+
+static int
+punydecode(uint input_length, char input[], uint max_out, Rune output[])
+{
+ uint n, out, i, bias, b, j, in, oldi, w, k, digit, t;
+
+ n = initial_n;
+ out = i = 0;
+ bias = initial_bias;
+
+ for (b = j = 0; j < input_length; ++j)
+ if (input[j] == '-')
+ b = j;
+
+ if (b > max_out)
+ return -1;
+
+ for (j = 0; j < b; ++j) {
+ if (input[j] & 0x80)
+ return -1;
+ output[out++] = input[j];
+ }
+
+ for (in = b > 0 ? b + 1 : 0; in < input_length; ++out) {
+ for (oldi = i, w = 1, k = base;; k += base) {
+ if (in >= input_length)
+ return -1;
+ digit = decode_digit(input[in++]);
+ if (digit >= base)
+ return -1;
+ if (digit > (maxint - i) / w)
+ return -1;
+ i += digit * w;
+ if (k <= bias)
+ t = tmin;
+ else if (k >= bias + tmax)
+ t = tmax;
+ else
+ t = k - bias;
+ if (digit < t)
+ break;
+ if (w > maxint / (base - t))
+ return -1;
+ w *= (base - t);
+ }
+
+ bias = adapt(i - oldi, out + 1, oldi == 0);
+
+ if (i / (out + 1) > maxint - n)
+ return -1;
+ n += i / (out + 1);
+ i %= (out + 1);
+
+ if (out >= max_out)
+ return -1;
+
+ memmove(output + i + 1, output + i, (out - i) * sizeof *output);
+ if(((uint)input[in-1] - 'A') < 26)
+ output[i++] = toupperrune(n);
+ else
+ output[i++] = tolowerrune(n);
+ }
+
+ return (int)out;
+}
+
+/*
+ * convert punycode encoded internationalized
+ * domain name to unicode string
+ */
+char*
+idn2utf(char *name, char *buf, int nbuf)
+{
+ char *dp, *de, *cp;
+ Rune rb[Domlen], r;
+ int nc, nr, n;
+
+ cp = name;
+ dp = buf;
+ de = dp+nbuf-1;
+ for(;;){
+ nc = nr = 0;
+ while(cp[nc] != 0){
+ n = chartorune(&r, cp+nc);
+ if(r == '.')
+ break;
+ rb[nr++] = r;
+ nc += n;
+ }
+ if(cistrncmp(cp, "xn--", 4) == 0)
+ if((nr = punydecode(nc-4, cp+4, nelem(rb), rb)) < 0)
+ return nil;
+ dp = seprint(dp, de, "%.*S", nr, rb);
+ if(dp >= de)
+ return nil;
+ if(cp[nc] == 0)
+ break;
+ *dp++ = '.';
+ cp += nc+1;
+ }
+ *dp = 0;
+ return buf;
+}
+
+/*
+ * convert unicode string to punycode
+ * encoded internationalized domain name
+ */
+char*
+utf2idn(char *name, char *buf, int nbuf)
+{
+ char *dp, *de, *cp;
+ Rune rb[Domlen], r;
+ int nc, nr, n;
+
+ dp = buf;
+ de = dp+nbuf-1;
+ cp = name;
+ for(;;){
+ nc = nr = 0;
+ while(cp[nc] != 0 && nr < nelem(rb)){
+ n = chartorune(&r, cp+nc);
+ if(r == '.')
+ break;
+ rb[nr++] = r;
+ nc += n;
+ }
+ if(nc == nr)
+ dp = seprint(dp, de, "%.*s", nc, cp);
+ else {
+ dp = seprint(dp, de, "xn--");
+ if((n = punyencode(nr, rb, de - dp, dp)) < 0)
+ return nil;
+ dp += n;
+ }
+ if(dp >= de)
+ return nil;
+ if(cp[nc] == 0)
+ break;
+ *dp++ = '.';
+ cp += nc+1;
+ }
+ *dp = 0;
+ return buf;
+}
+
--- a/sys/src/cmd/webfs/mkfile
+++ b/sys/src/cmd/webfs/mkfile
@@ -3,6 +3,6 @@
TARG=webfs
HFILES=fns.h dat.h
-OFILES=sub.$O url.$O buq.$O http.$O fs.$O
+OFILES=sub.$O url.$O buq.$O http.$O fs.$O idn.$O
</sys/src/cmd/mkone
--- a/sys/src/cmd/webfs/url.c
+++ b/sys/src/cmd/webfs/url.c
@@ -69,6 +69,21 @@
}
int
+Hfmt(Fmt *f)
+{
+ char *d, *s;
+
+ s = va_arg(f->args, char*);
+ d = emalloc(Domlen);
+ if(utf2idn(s, d, Domlen) == nil)
+ d = s;
+ fmtprint(f, "%s", d);
+ if(d != s)
+ free(d);
+ return 0;
+}
+
+int
Ufmt(Fmt *f)
{
char *s;
@@ -87,7 +102,7 @@
fmtprint(f, "@");
}
if(u->host){
- fmtprint(f, strchr(u->host, ':') ? "[%s]" : "%s", u->host);
+ fmtprint(f, strchr(u->host, ':') ? "[%s]" : "%H", u->host);
if(u->port)
fmtprint(f, ":%s", u->port);
}
@@ -184,12 +199,17 @@
static char*
mklowcase(char *s)
{
- char *p;
-
+ char *cp;
+ Rune r;
+
if(s == nil)
return s;
- for(p = s; *p; p++)
- *p = tolower(*p);
+ cp = s;
+ while(*cp != 0){
+ chartorune(&r, cp);
+ r = tolowerrune(r);
+ cp += runetochar(cp, &r);
+ }
return s;
}
@@ -298,6 +318,15 @@
if(s = u->query)
while(s = strchr(s, '+'))
*s++ = ' ';
+
+ if(s = u->host){
+ t = emalloc(Domlen);
+ if(idn2utf(s, t, Domlen)){
+ u->host = estrdup(t);
+ free(s);
+ }
+ free(t);
+ }
unescape(u->user, "");
unescape(u->pass, "");