ref: 85d6170c351e6fc3ec42e4f59fe465c4e09556ba
parent: 001ce57253f99e1a988a630ddf6bb05815bace80
author: cinap_lenrek <cinap_lenrek@centraldogma>
date: Mon Oct 3 16:40:43 EDT 2011
mothra: ignore http content-type and encoding and just do content sniffing
--- a/sys/src/cmd/mothra/filetype.c
+++ /dev/null
@@ -1,106 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <draw.h>
-#include <event.h>
-#include <panel.h>
-#include <ctype.h>
-#include "mothra.h"
-typedef struct Kind Kind;
-struct Kind{
- char *name;
- int kind;
-};
-int klook(char *s, Kind *k){
- while(k->name && cistrcmp(k->name, s)!=0)
- k++;
- return k->kind;
-}
-Kind suffix[]={
- ".html", HTML,
- ".htm", HTML,
- "/", HTML,
- ".gif", GIF,
- ".jpe", JPEG,
- ".jpg", JPEG,
- ".jpeg", JPEG,
- ".png", PNG,
- ".pic", PIC,
- ".au", AUDIO,
- ".tif", TIFF,
- ".tiff", TIFF,
- ".xbm", XBM,
- ".txt", PLAIN,
- ".text", PLAIN,
- ".ai", POSTSCRIPT,
- ".eps", POSTSCRIPT,
- ".ps", POSTSCRIPT,
- ".pdf", PDF,
- ".zip", ZIP,
- 0, HTML
-};
-int suflook(char *s, int len, Kind *k){
- int l;
- while(k->name){
- l=strlen(k->name);
- if(l<=len && cistrcmp(k->name, s+len-l)==0) return k->kind;
- k++;
- }
- return k->kind;
-}
-int suffix2type(char *name){
- int len, kind, restore;
- char *s;
- len=strlen(name);
- if(len>=2 && cistrcmp(name+len-2, ".Z")==0){
- kind=COMPRESS;
- len-=2;
- }
- else if(len>=3 && cistrcmp(name+len-3, ".gz")==0){
- kind=GUNZIP;
- len-=3;
- }
- else
- kind=0;
- restore=name[len];
- name[len]='\0';
- for(s=name+len;s!=name && *s!='.';--s);
- kind|=suflook(name, len, suffix);
- name[len]=restore;
- return kind;
-}
-Kind content[]={
- "text/html", HTML,
- "text/x-html", HTML,
- "application/html", HTML,
- "application/x-html", HTML,
- "text/plain", PLAIN,
- "image/gif", GIF,
- "image/jpeg", JPEG,
- "image/pjpeg", JPEG,
- "image/png", PNG,
- "image/tiff", TIFF,
- "image/x-xbitmap", XBM,
- "image/x-bitmap", XBM,
- "image/xbitmap", XBM,
- "application/postscript", POSTSCRIPT,
- "application/pdf", PDF,
- "application/octet-stream", SUFFIX,
- "application/zip", ZIP,
- 0, SUFFIX
-};
-int content2type(char *s, char *name){
- int type;
- type=klook(s, content);
- if(type==SUFFIX) type=suffix2type(name);
- return type;
-}
-Kind encoding[]={
- "x-compress", COMPRESS,
- "compress", COMPRESS,
- "x-gzip", GUNZIP,
- "gzip", GUNZIP,
- 0, 0
-};
-int encoding2type(char *s){
- return klook(s, encoding);
-}
--- a/sys/src/cmd/mothra/getpix.c
+++ b/sys/src/cmd/mothra/getpix.c
@@ -18,9 +18,7 @@
[GIF] "gif -9t",
[JPEG] "jpg -9t",
[PNG] "png -9t",
-[PIC] "fb/3to1 /lib/fb/cmap/rgbv",
-[TIFF] "/sys/lib/mothra/tiffcvt",
-[XBM] "fb/xbm2pic",
+[BMP] "bmp -9t",
};
void storebitmap(Rtext *t, Image *b){
@@ -34,7 +32,7 @@
Action *ap;
Url url;
Image *b;
- int fd;
+ int fd, typ;
char err[512];
Pix *p;
@@ -56,17 +54,12 @@
close(fd);
return;
}
- if(url.type!=GIF
- && url.type!=JPEG
- && url.type!=PNG
- && url.type!=PIC
- && url.type!=TIFF
- && url.type!=XBM){
+ typ = snooptype(fd);
+ if(typ < 0 || typ >= nelem(pixcmd) || pixcmd[typ] == nil){
werrstr("unknown image type");
goto Err;
}
-
- if((fd = pipeline(pixcmd[url.type], fd)) < 0)
+ if((fd = pipeline(pixcmd[typ], fd)) < 0)
goto Err;
if(ap->width>0 || ap->height>0){
char buf[80];
--- a/sys/src/cmd/mothra/html.h
+++ b/sys/src/cmd/mothra/html.h
@@ -70,7 +70,6 @@
char *etext; /* end of text buffer */
Form *form; /* data for form under construction */
Www *dst; /* where the text goes */
- char charset[NNAME];
};
/*
--- a/sys/src/cmd/mothra/mkfile
+++ b/sys/src/cmd/mothra/mkfile
@@ -3,7 +3,7 @@
TARG=mothra
LIB=libpanel/libpanel.$O.a
CFILES= \
- filetype.c \
+ snoop.c \
forms.c \
getpix.c \
html.syntax.c \
--- a/sys/src/cmd/mothra/mothra.c
+++ b/sys/src/cmd/mothra/mothra.c
@@ -30,7 +30,6 @@
"http://cat-v.org/",
"",
"",
- HTML,
};
Url badurl={
"",
@@ -38,7 +37,6 @@
"No file loaded",
"",
"",
- HTML,
};
Cursor patientcurs={
0, 0,
@@ -816,7 +814,6 @@
memset(url->fullname, 0, sizeof(url->fullname));
strcpy(url->fullname, "file:");
fd2path(fd, url->fullname+5, sizeof(url->fullname)-6);
- url->type = content2type("application/octet-stream", url->fullname);
return fd;
}
@@ -870,20 +867,6 @@
snprint(buf, sizeof buf, "%s/%d/parsed", mtpt, conn);
readstr(url->fullname, sizeof(url->fullname), buf, "url");
readstr(url->tag, sizeof(url->tag), buf, "fragment");
-
- snprint(buf, sizeof buf, "%s/%d", mtpt, conn);
- readstr(buf, sizeof buf, buf, "contenttype");
- url->charset[0] = 0;
- if(p = cistrstr(buf, "charset=")){
- p += 8;
- strncpy(url->charset, p, sizeof(url->charset));
- if(p = strchr(url->charset, ';'))
- *p = 0;
- }
- if(p = strchr(buf, ';'))
- *p = 0;
- url->type = content2type(buf, url->fullname);
-
close(ctlfd);
return fd;
}
@@ -931,9 +914,7 @@
strncpy(url->reltext, urlname, sizeof(url->reltext));
strncpy(url->basename, base, sizeof(url->basename));
url->fullname[0] = 0;
- url->charset[0] = 0;
url->tag[0] = 0;
- url->type = 0;
url->map = 0;
}
Url *copyurl(Url *u){
@@ -951,7 +932,7 @@
* get the file at the given url
*/
void geturl(char *urlname, int method, char *body, int cache, int map){
- int i, fd;
+ int i, fd, typ;
char cmd[NNAME];
int pfd[2];
Www *w;
@@ -968,18 +949,17 @@
break;
}
message("getting %s", selection->fullname);
- if(selection->type&COMPRESS)
- fd=pipeline("/bin/uncompress", fd);
- else if(selection->type&GUNZIP)
+ typ = snooptype(fd);
+ if(typ == GUNZIP){
fd=pipeline("/bin/gunzip", fd);
- switch(selection->type&~COMPRESSION){
+ typ = snooptype(fd);
+ }
+ switch(typ){
default:
- message("Bad type %x in geturl", selection->type);
+ message("Bad type %x in geturl", typ);
break;
case HTML:
- snprint(cmd, sizeof(cmd), selection->charset[0] ?
- "/bin/uhtml -c %s" : "/bin/uhtml", selection->charset);
- fd = pipeline(cmd, fd);
+ fd = pipeline("/bin/uhtml", fd);
case PLAIN:
w = www(i = wwwtop++);
if(i >= NWWW){
@@ -1005,23 +985,17 @@
w->url=copyurl(selection);
w->finished = 0;
w->alldone = 0;
- gettext(w, fd, selection->type&~COMPRESSION);
+ gettext(w, fd, typ);
plinitlist(list, PACKN|FILLX, genwww, 8, doprev);
if(defdisplay) pldraw(list, screen);
setcurrent(i, selection->tag);
break;
- case POSTSCRIPT:
case GIF:
case JPEG:
case PNG:
- case PDF:
+ case BMP:
+ case PAGE:
filter("page -w", fd);
- break;
- case TIFF:
- filter("/sys/lib/mothra/tiffview", fd);
- break;
- case XBM:
- filter("fb/xbm2pic|fb/9v", fd);
break;
}
break;
--- a/sys/src/cmd/mothra/mothra.h
+++ b/sys/src/cmd/mothra/mothra.h
@@ -27,9 +27,7 @@
char basename[NNAME];
char reltext[NNAME];
char tag[NNAME];
- char charset[NNAME];
- int type;
- int map; /* is this an image map? */
+ int map; /* is this an image map? */
};
struct Www{
Url *url;
@@ -43,29 +41,15 @@
int alldone; /* page will not change further -- used to adjust cursor */
};
-/*
- * url reference types -- COMPRESS and GUNZIP are flags that can modify any other type
- * Changing these in a non-downward compatible way spoils cache entries
- */
enum{
- GIF=1,
+ PLAIN,
HTML,
+ GIF,
JPEG,
- PIC,
- TIFF,
- AUDIO,
- PLAIN,
- XBM,
- POSTSCRIPT,
- FORWARD,
- PDF,
- SUFFIX,
- ZIP,
PNG,
-
- COMPRESS=16,
- GUNZIP=32,
- COMPRESSION=16+32,
+ BMP,
+ GUNZIP,
+ PAGE,
};
/*
@@ -102,9 +86,7 @@
void *emallocz(int, int);
void setbitmap(Rtext *);
void message(char *, ...);
-int suffix2type(char *);
-int content2type(char *, char *);
-int encoding2type(char *);
+int snooptype(int fd);
void mkfieldpanel(Rtext *);
void geturl(char *, int, char *, int, int);
char version[];
--- a/sys/src/cmd/mothra/rdhtml.c
+++ b/sys/src/cmd/mothra/rdhtml.c
@@ -609,8 +609,6 @@
dst->title[0]='\0';
g.spacc=0;
g.form=0;
- g.charset[0] = '\0';
- strncpy(g.charset, dst->url->charset, sizeof(g.charset));
for(;;) switch(pl_gettoken(&g)){
case TAG:
--- /dev/null
+++ b/sys/src/cmd/mothra/snoop.c
@@ -1,0 +1,65 @@
+#include <u.h>
+#include <libc.h>
+#include <draw.h>
+#include <event.h>
+#include <panel.h>
+#include <ctype.h>
+#include "mothra.h"
+
+int
+snooptype(int fd)
+{
+ int pfd[2], typ, n;
+ char buf[1024];
+
+ typ = PLAIN;
+ if((n = readn(fd, buf, sizeof(buf)-1)) < 0)
+ return typ;
+ buf[n] = 0;
+ if(cistrstr(buf, "<?xml") ||
+ cistrstr(buf, "<!DOCTYPE") ||
+ cistrstr(buf, "<HTML"))
+ typ = HTML;
+ else if(memcmp(buf, "\x1F\x8B", 2) == 0)
+ typ = GUNZIP;
+ else if(memcmp(buf, "\377\330\377", 3) == 0)
+ typ = JPEG;
+ else if(memcmp(buf, "\211PNG\r\n\032\n", 3) == 0)
+ typ = PNG;
+ else if(memcmp(buf, "GIF", 3) == 0)
+ typ = GIF;
+ else if(memcmp(buf, "BM", 2) == 0)
+ typ = BMP;
+ else if(memcmp(buf, "PK\x03\x04", 4) == 0)
+ typ = PAGE;
+ else if(memcmp(buf, "%PDF-", 5) == 0 || strstr(buf, "%!"))
+ typ = PAGE;
+ else if(memcmp(buf, "x T ", 4) == 0)
+ typ = PAGE;
+ else if(memcmp(buf, "\xF7\x02\x01\x83\x92\xC0\x1C;", 8) == 0)
+ typ = PAGE;
+ else if(memcmp(buf, "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", 8) == 0)
+ typ = PAGE;
+ else if(memcmp(buf, "\111\111\052\000", 4) == 0)
+ typ = PAGE;
+ else if(memcmp(buf, "\115\115\000\052", 4) == 0)
+ typ = PAGE;
+ if(pipe(pfd) >= 0){
+ switch(rfork(RFFDG|RFPROC|RFNOWAIT)){
+ case -1:
+ break;
+ case 0:
+ close(pfd[0]);
+ do {
+ if(write(pfd[1], buf, n) != n)
+ break;
+ } while((n = read(fd, buf, sizeof(buf))) > 0);
+ exits(nil);
+ default:
+ dup(pfd[0], fd);
+ }
+ close(pfd[1]);
+ close(pfd[0]);
+ }
+ return typ;
+}