ref: 6c91d99ce29f8cb6c47c0c535c9162f248f1fdb1
parent: 7a29aa57cbc0a5ccf015ded763b54046235275ec
author: cinap_lenrek <cinap_lenrek@centraldogma>
date: Tue Sep 20 00:14:29 EDT 2011
uhtml: remove trailing utf BOM marker, html2ms: fix underline handling and escaping
--- a/sys/src/cmd/html2ms.c
+++ b/sys/src/cmd/html2ms.c
@@ -142,8 +142,24 @@
}
void
-ona(Text *text, Tag *)
+restoreunderline(Text *text, Tag *tag)
{
+ text->underline = tag->restore;
+ emit(text, "");
+}
+
+void
+ona(Text *text, Tag *tag)
+{
+ int i;
+
+ for(i=0; i<tag->nattr; i++)
+ if(cistrcmp(tag->attr[i].attr, "href") == 0)
+ break;
+ if(i == tag->nattr)
+ return;
+ tag->restore = text->underline;
+ tag->close = restoreunderline;
text->underline = 1;
}
@@ -207,10 +223,13 @@
if(n != 7 || cistrncmp(buf, "[CDATA[", 7))
continue;
while((c = Bgetc(&in)) > 0){
- if(c == ']')
- if(Bgetc(&in) == ']')
- if(Bgetc(&in) == '>')
- return;
+ if(c == ']'){
+ if(Bgetc(&in) == ']'){
+ if(Bgetc(&in) != '>')
+ Bungetc(&in);
+ return;
+ }
+ }
}
}
}
@@ -425,10 +444,9 @@
case '\r':
case ' ':
case '\t':
- if(text->pre == 0){
- text->space = 1;
+ text->space = 1;
+ if(text->pre == 0)
continue;
- }
default:
if(r == '\n' || r == '\r')
text->pos = 0;
@@ -435,8 +453,8 @@
if(text->space){
text->space = 0;
if(text->underline){
- emit(text, "");
- text->pos = Bprint(&out, ".UL ");
+ emit(text, ".UL ");
+ text->pos = 1;
} else if(text->pos >= 70){
text->pos = 0;
Bputc(&out, '\n');
@@ -445,16 +463,15 @@
Bputc(&out, ' ');
}
}
- if(text->pos == 0 && r == '.'){
- text->pos++;
- Bputc(&out, ' ');
- }
- text->pos++;
- if(r == 0xA0){
+ if(text->pos == 0 && r == '.')
+ text->pos += Bprint(&out, "\\&");
+ else if(r == '\\')
+ text->pos += Bprint(&out, "\\&\\");
+ else if(r == 0xA0){
r = ' ';
- Bputc(&out, '\\');
+ text->pos += Bprint(&out, "\\");
}
- Bprint(&out, "%C", r);
+ text->pos += Bprint(&out, "%C", r);
}
}
}
@@ -473,7 +490,10 @@
Binit(&out, 1, OWRITE);
memset(&text, 0, sizeof(text));
+
+ text.font = "R";
text.output = 1;
+
parsetext(&text, nil);
emit(&text, "\n");
}
--- a/sys/src/cmd/page.c
+++ b/sys/src/cmd/page.c
@@ -623,9 +623,9 @@
p->data = "lp -dstdout";
p->open = popengs;
}
- else if(cistrncmp(buf, "<?xml", 5) == 0 ||
- cistrncmp(buf, "<!DOCTYPE", 9) == 0 ||
- cistrncmp(buf, "<HTML", 5) == 0){
+ else if(cistrstr(buf, "<?xml") ||
+ cistrstr(buf, "<!DOCTYPE") ||
+ cistrstr(buf, "<HTML")){
p->data = "uhtml | html2ms | troff -ms | lp -dstdout";
p->open = popengs;
}
--- a/sys/src/cmd/uhtml.c
+++ b/sys/src/cmd/uhtml.c
@@ -62,6 +62,11 @@
if((nbuf = read(0, buf, sizeof(buf)-1)) < 0)
sysfatal("read: %r");
buf[nbuf] = 0;
+
+ /* useless BOM marker */
+ if(memcmp(buf, "\xEF\xBB\xBF", 3)==0)
+ memmove(buf, buf+3, nbuf-3);
+
for(;;){
if(s = cistrstr(buf, "encoding="))
if(s = strval(s+9)){