ref: 679b092ee02429b444b3e8995f6db11b42008dad
parent: 66f76c28212d3a25d4b039de2ce817fc74c5ca1e
author: cinap_lenrek <[email protected]>
date: Sun May 11 22:38:53 EDT 2014
htmlfmt: use uhtml for character set conversion
--- a/sys/src/cmd/htmlfmt/dat.h
+++ b/sys/src/cmd/htmlfmt/dat.h
@@ -28,12 +28,10 @@
extern char* url;
extern int aflag;
extern int width;
-extern int defcharset;
extern char* loadhtml(int);
extern char* readfile(char*, char*, int*);
-extern int charset(char*);
extern void* emalloc(ulong);
extern char* estrdup(char*);
extern char* estrstrdup(char*, char*);
--- a/sys/src/cmd/htmlfmt/html.c
+++ b/sys/src/cmd/htmlfmt/html.c
@@ -285,40 +285,13 @@
free(t);
}
-/*
- * Somewhat of a hack. Not a full parse, just looks for strings in the beginning
- * of the document (cistrstr only looks at first somewhat bytes).
- */
-int
-charset(char *s)
-{
- char *meta, *emeta, *charset;
-
- if(defcharset == 0)
- defcharset = ISO_8859_1;
- meta = cistrstr(s, "<meta");
- if(meta == nil)
- return defcharset;
- for(emeta=meta; *emeta!='>' && *emeta!='\0'; emeta++)
- ;
- charset = cistrstr(s, "charset=");
- if(charset == nil)
- return defcharset;
- charset += 8;
- if(*charset == '"')
- charset++;
- if(cistrncmp(charset, "utf-8", 5) || cistrncmp(charset, "utf8", 4))
- return UTF_8;
- return defcharset;
-}
-
void
rendertext(URLwin *u, Bytes *b)
{
Rune *rurl;
- rurl = toStr((uchar*)u->url, strlen(u->url), ISO_8859_1);
- u->items = parsehtml(b->b, b->n, rurl, u->type, charset((char*)b->b), &u->docinfo);
+ rurl = toStr((uchar*)u->url, strlen(u->url), UTF_8);
+ u->items = parsehtml(b->b, b->n, rurl, u->type, UTF_8, &u->docinfo);
// free(rurl);
rerender(u);
--- a/sys/src/cmd/htmlfmt/main.c
+++ b/sys/src/cmd/htmlfmt/main.c
@@ -8,8 +8,35 @@
char *url = "";
int aflag;
int width = 70;
-int defcharset;
+char *defcharset = "latin1";
+int
+uhtml(int fd)
+{
+ int p[2];
+
+ if(pipe(p) < 0)
+ return fd;
+ switch(fork()){
+ case -1:
+ break;
+ case 0:
+ dup(fd, 0);
+ dup(p[1], 1);
+ close(p[1]);
+ close(p[0]);
+ execl("/bin/uhtml", "uhtml", "-c", defcharset, nil);
+ execl("/bin/cat", "cat", nil);
+ exits("exec");
+ default:
+ dup(p[0], fd);
+ break;
+ }
+ close(p[0]);
+ close(p[1]);
+ return fd;
+}
+
void
usage(void)
{
@@ -21,7 +48,7 @@
main(int argc, char *argv[])
{
int i, fd;
- char *p, *err, *file;
+ char *err, *file;
char errbuf[ERRMAX];
ARGBEGIN{
@@ -29,9 +56,7 @@
aflag++;
break;
case 'c':
- p = smprint("<meta charset=\"%s\">", EARGF(usage()));
- defcharset = charset(p);
- free(p);
+ defcharset = EARGF(usage());
break;
case 'l': case 'w':
err = EARGF(usage());
@@ -50,7 +75,7 @@
err = nil;
file = "<stdin>";
if(argc == 0)
- err = loadhtml(0);
+ err = loadhtml(uhtml(0));
else
for(i=0; err==nil && i<argc; i++){
file = argv[i];
@@ -60,7 +85,7 @@
err = errbuf;
break;
}
- err = loadhtml(fd);
+ err = loadhtml(uhtml(fd));
close(fd);
if(err)
break;